{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "import gym, random, pickle, os.path, math, glob\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "\n",
    "import torch\n",
    "import torch.optim as optim\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torch.autograd as autograd\n",
    "import pdb\n",
    "\n",
    "from atari_wrappers import make_atari, wrap_deepmind,LazyFrames\n",
    "from IPython.display import clear_output\n",
    "from tensorboardX import SummaryWriter\n",
    "\n",
    "USE_CUDA = torch.cuda.is_available()\n",
    "dtype = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor\n",
    "Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda() if USE_CUDA else autograd.Variable(*args, **kwargs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.image.AxesImage at 0x19adc44e848>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD7CAYAAACscuKmAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAASCElEQVR4nO3dW4wkZ3nG8f9T1d0zO7Oz3oNtYu9aNkjIgEC2yQZMHEWAcWQIgigSyI6IUITiXJDEREgEJxeIO19ECKQQohWHOOEcAwFZCLA4iERCjo3tEGBtzMGx16ddFhvv7uxMd1e9uegaPF7P7NT0zHR3zff8pNF0Vfd0fd/WPl3VdfheRQRmtv1l426AmY2Gw26WCIfdLBEOu1kiHHazRDjsZonYUNglXSvpfkk/lfTezWqUmW0+DXueXVIO/AS4BjgC3AlcHxE/3rzmmdlmaW3gb18B/DQifg4g6bPAm4FVw97RVEwzu+Ybq9MmWjlIoA20cLNIBEAGRVtEPuTblJD1QBFQVr9tayxbZ2VblImss4WFp+j2Tq2Ymo2EfT/w8LLpI8Arz/YH08zySl191jdVq0W+/0KKfXNEnlF2csjHm/jIRJmLspNx8sIWi3uGa0/7ZDD7REm+UJJ1S/JeuckttSXPWmcXbMI6W2zGOrvzrg+v+txGwr7Sv95zPvYk3QDcADCdz9Haf+Ds75pnlLtmKDutwRI0/k17SJCJyER/h+juGvbTXZTHRZZX7ydN/JaiqZavs2KH6M0N+06i/JXI+s1fZxsJ+xHgomXTB4BHz3xRRBwCDgHsmtsf3YvPXfudpYk7TxCZiBb0Z6F3znCf7ipzig5kPZH1xv8htt0trbPeLHR3D7nOIts262wjYb8TeKGk5wOPANcBf7LmX415l3xoEagQWRfy+WzNYwnRCsp2VHsn1Uw1c4vQeGLwb7+0HmIQXp0R3sih7MQz60kQDf3vupKhwx4RfUl/CXwdyIGPR8SPNq1lE0RlDA7SFAXnPChmnljjf4Dg9LkZ8xcOAh95TNyeStJCTB/NmH00UPnMB/BgnUG0oGyx7T6cN7JlJyK+Cnx1k9oysRSBioAC8uNdptd4fUiU+RSnnwfKB7uTKxzOsDFRQPsE7HykS1YMdu9DomxNsXA+FNnge/l2W2MbCnsqYukgoaCYytc89RYSxRSTcdrQrOKw1yUoc9HbmdGdXXufvLdTRBZExrbbHbRmcthrWjqVU7SrrfYayvb2Orhjzeew1yGIlijbYmFvxsJaZw8FxVQQ7SCy8O68TQSHvYbQ4OKMsi26u2Dx3MIBtsZx2IdxRtCzBZF1teZX89YpkfWCrB/Q0KuwrLkc9o0qYep4xszjgda4SKu1WNI5UZB1S9SAmypse3HYN0HWg6kT5Zphz3rhoE+Kau8sJuDei1Fx2C1JkUMxnT3rA7psVWdQtumpUofdklS2oL/jjLB32NaXNTvsmyBa0NuRrblrnncHB+jol6gYUePsOUKDYPdmsmeujRcUHRHavqdKHfaNEizuDfo7lt/etrLO0zD3MLTmBd3BYAj+7j4GChb3BMX0s9dZMRWUne171aPDvlGCYqakmKnz2oziiere6P423Xw0wXrW2TbisNekMlBftE9C/8nhvti1T/g8+ygtrbPWKeg8New6G5xt2Q7rzGGvQWVAHzKVTP+qHHrEktbpoD1f/ibw3oXfOl5nzzXSsEcmiqkhh/kco6jGwlu6tXWt8+lnU7YEAaGMaOqoPQ2Q6jobjJ2wspGGvb9D/PJlnVEuctNEdSynbA9+hqEC5nv54D9ebMtjQBMlxXXWv3cDYZf0ceCNwNGIeGk1by/wOeAS4EHgrRHx5FrvVXbg5MU+52S2VcqzbEvrbNn/BfhH4F+XzXsv8M2IuLkq+/Re4G/XeqOsUzB70YkaizSzYWSd1Tema4Y9Ir4r6ZIzZr8ZeHX1+BbgO9QI+8Uzx/nnyz651svMbEh/PnN81eeG/c7+vIh4DCAiHpN0fp0/2ilx1fQ2vh7RbMx2nuXGni1PnqQbJN0l6a5jx/193Wxchg37E5IuAKh+H13thRFxKCIORsTB8/Y177Sb2XYxbNi/Ary9evx24Mub0xwz2yprhl3SZ4DvAZdKOiLpHcDNwDWSHmBQn/3mrW2mmW1UnaPx16/y1NlrL5vZRBnpFXRPlRlfOZXYrUZmI/RUufrO+kjDfrQ7xz89/JpRLtIsKUe7j6/63EjD3ityHvn1OaNcpFlSesXqZ7xGe4vryRz95+6RLtIsKScnJOzt+eD8uxdGuUizpDw0v/pteaPdskezb/43m3hnyZcvVDdLhMNulgiH3SwRDrtZIhx2s0Q47GaJcNjNEuGwmyXCYTdLhMNulgiH3SwRdYalukjStyUdlvQjSTdW8/dKul3SA9XvPVvfXDMbVp0tex94d0S8GLgSeKekl/BMVZgXAt+sps1sQq0Z9oh4LCLurh6fAA4D+xlUhbmletktwB9tURvNbBOs6zt7VQbqCuAOzqgKA6xYFWZ5kYhu79QGm2tmw6oddkk7gS8A74qIp+v+3fIiEZ327DBtNLNNUCvsktoMgv6piPhiNbt2VRgzG786R+MFfAw4HBEfWPaUq8KYNUidYamuAv4U+F9J91bz/o5BFZjPVxViHgLesiUtNLNNUacizH8Bq9WBdVUYs4bwFXRmiXDYzRLhsJslwmE3S4TDbpYIh90sEQ67WSIcdrNEOOxmiXDYzRLhsJslwmE3S4TDbpaIOre4mtmQIhORDW4ajVyEQAEqAgCVgcoYSVscdrMtEhoEvWxnhKDsDKazfqAiUAlZt0Q47GaNV7ZE2RKRQzGVUeaQ9SHvgQpQHyhG0xaH3WyrCPozOd25jGIKFveIogOt09A+GWRdmDkGea8cSXPqjEE3Lem/Jf1PVRHm/dV8V4QxW0MxJbo7RXeXWNgXLJxfsLAv6J4jenOi7Kw2CNTmq3M0fhF4bURcBlwOXCvpSlwRxqxR6lSEiYg4WU22q5/AFWHMGqXuuPF5NbLsUeD2iHBFGLOGqRX2iCgi4nLgAPAKSS+tuwBXhDGbDOu6gi4ingK+A1yLK8KYNUqdo/HnSdpdPd4BvA64D1eEMWuUOufZLwBukZQz+HD4fETcJul7uCKMWWPUqQjzAwZlms+cfxxXhDFrDN/1ZpYIh90sEQ67WSIcdrNEOOxmifAtrmZbKO8G7fkgK6DoiHI+pzX/zC2uWW80A1eAw262dQJapwqyXlC2xNSvRZkPBq7IeoPhqPLTIxq5AofdbEtl/QAtDUP13GGpNJpxKwCH3WzLKAJKyHrlYKDJQiANBpgsYzDwZHg33mxbeNbosb3xtsVH480S4bCbJcJhN0uEw26WCIfdLBEOu1kiHHazRNQOezWc9D2SbqumXRHGrEHWs2W/ETi8bNoVYcwapG6RiAPAHwIfXTbbFWHMGqTulv2DwHuA5ZftuyKMWYPUGTf+jcDRiPj+MAtwRRizyVDnRpirgDdJegMwDeyS9EmqijAR8ZgrwphNvjpVXG+KiAMRcQlwHfCtiHgbrghj1igbOc9+M3CNpAeAa6ppM5tQ67qfPSK+w6CwoyvCmDWMr6AzS4TDbpYIh90sEQ67WSIcdrNEOOxmiXDYzRLhsJslwmE3S4TDbpYIh90sEQ67WSIcdrNEOOxmiXDYzRLhsJslwmE3S0StkWokPQicAAqgHxEHJe0FPgdcAjwIvDUintyaZprZRq1ny/6aiLg8Ig5W064IY9YgG9mNd0UYswapG/YAviHp+5JuqOa5IoxZg9QdXfaqiHhU0vnA7ZLuq7uAiDgEHALYNbc/hmijmW2CWlv2iHi0+n0U+BLwCqqKMACuCGM2+erUepuVNLf0GPgD4Ie4IoxZo9TZjX8e8CVJS6//dER8TdKdwOclvQN4CHjL1jXTzDZqzbBHxM+By1aY74owZg3iK+jMEuGwmyXCYTdLhMNulgiH3SwRDrtZIhx2s0Q47GaJcNjNEuGwmyXCYTdLhMNulgiH3SwRDrtZIhx2s0Q47GaJcNjNElEr7JJ2S7pV0n2SDkt6laS9km6X9ED1e89WN9bMhld3y/4h4GsR8SIGQ1QdxhVhzBqlzuiyu4DfBz4GEBHdiHgKV4Qxa5Q6W/YXAMeAT0i6R9JHqyGlXRHGrEHqhL0FvBz4SERcAZxiHbvsEXEoIg5GxMFOe3bIZprZRtUJ+xHgSETcUU3fyiD8rghj1iBrhj0iHgcelnRpNetq4Me4IoxZo9Qt7PhXwKckdYCfA3/G4IPCFWHMGqJW2CPiXuDgCk+5IoxZQ/gKOrNEOOxmiXDYzRLhsJslwmE3S4TDbpYIh90sEQ67WSIcdrNEOOxmiXDYzRLhsJslwmE3S4TDbpYIh90sEQ67WSLqDCV9qaR7l/08LeldLhJh1ix1xqC7PyIuj4jLgd8G5oEv4SIRZo2y3t34q4GfRcT/4SIRZo2y3rBfB3ymelyrSISZTYbaYa9Gln0T8O/rWYArwphNhvVs2V8P3B0RT1TTtYpEuCKM2WRYT9iv55ldeHCRCLNGqTVuvKQZ4BrgL5bNvpkxF4ko84yykxEZRC4iBwKybqAyyPpB1i9H3SyziVS3SMQ8sO+MeccZc5GIYipjcXdO2RL9HVBMiawXdE4EeQ/apwp0KlDEOJtpNhHqln+aTBmULVG2B0Evdgy28OUCKIKypXG30GxiNDrsZVv0Z6DoiN4c9GeDfEFkXVHmkC8KBHjDbtbwsOeiPy2KaejNBf2dJWUrIz8tkCjb3rKbLdleN8Ko+jGz59heYTezVTnsZolw2M0S4bCbJcJhN0uEw26WCIfdLBEjvaim6GScODC1ae+3uDtjcV9QdKB/ToFm+/TbOYv9Fv1FUbYyylbHV9BZMoofrL79HmnY+zvg+Ms276qXYrZAe7rkrYK9swvMTXWZ77V5cm6WxV7GwqkW8ydyh92S0f/26s+N9nLZPOjvLjbt7TTTZ+fOBTqtgt07TjPXXmC61aFfZHT7LU7nQT8PCF9WZ4nIV9+yjTTsu2fn+ePfuWvT3m9H3uPc9gnaKpjLTjOd9ehFi1/1d9KLnBPFNE/3pzdteWaT7tOz86s+N9KwH2if4ubfunPT3i9b9UL4Y795VHof3hLy3fbq4zyONOxCtJWPcpGMdmlm46Wz3AlW69SbpL+R9CNJP5T0GUnTrghj1ix1yj/tB/4aOBgRL2WwsbwOV4Qxa5S6F9W0gB2SWsAM8CiuCGPWKHVqvT0C/AODEWQfA34dEd/AFWHMGqXObvweBlvx5wMXArOS3lZ3Acsrwhw7vnnn2M1sfersxr8O+EVEHIuIHvBF4HcZoiLMeft8bNxsXOqE/SHgSkkzksRgrPjDuCKMWaOseZ49Iu6QdCtwN9AH7gEOATsZc0UYM6uvbkWY9wHvO2P2ImOuCGNm9fl+drNEOOxmiXDYzRLhsJslQjHCcsaSjgGngF+ObKFb71zcn0m2nfpTpy8XR8R5Kz0x0rADSLorIg6OdKFbyP2ZbNupPxvti3fjzRLhsJslYhxhPzSGZW4l92eybaf+bKgvI//Obmbj4d14s0SMNOySrpV0v6SfSmrUMFaSLpL0bUmHq/H4bqzmN3osPkm5pHsk3VZNN7Y/knZLulXSfdV6elXD+7OpYz+OLOyScuDDwOuBlwDXS3rJqJa/CfrAuyPixcCVwDur9jd9LL4bGdyyvKTJ/fkQ8LWIeBFwGYN+NbI/WzL2Y0SM5Ad4FfD1ZdM3ATeNavlb0J8vA9cA9wMXVPMuAO4fd9vW0YcD1X+Y1wK3VfMa2R9gF/ALquNQy+Y3tT/7gYeBvQzuTr0N+ION9GeUu/FLjV9ypJrXOJIuAa4A7qDZY/F9EHgPUC6b19T+vIBBdZBPVF9LPipplob2J7Zg7MdRhn2l0esbdypA0k7gC8C7IuLpcbdnWJLeCByNiO+Puy2bpAW8HPhIRFzB4LLsRuyyr2SjYz+uZJRhPwJctGz6AIMhqRtDUptB0D8VEV+sZtcai28CXQW8SdKDwGeB10r6JM3tzxHgSETcUU3fyiD8Te3PhsZ+XMkow34n8EJJz5fUYXCw4SsjXP6GVOPvfQw4HBEfWPZUI8fii4ibIuJARFzCYF18KyLeRnP78zjwsKRLq1lXAz+mof1hK8Z+HPFBhzcAPwF+Bvz9uA+CrLPtv8fga8cPgHurnzcA+xgc5Hqg+r133G0dom+v5pkDdI3tD3A5cFe1jv4D2NPw/rwfuA/4IfBvwNRG+uMr6MwS4SvozBLhsJslwmE3S4TDbpYIh90sEQ67WSIcdrNEOOxmifh/6EGWy7EooBEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Create and wrap the environment\n",
    "env = make_atari('PongNoFrameskip-v4') # only use in no frameskip environment\n",
    "env = wrap_deepmind(env, scale = False, frame_stack=True )\n",
    "n_actions = env.action_space.n\n",
    "state_dim = env.observation_space.shape\n",
    "\n",
    "# env.render()\n",
    "test = env.reset()\n",
    "for i in range(100):\n",
    "    test = env.step(env.action_space.sample())[0]\n",
    "\n",
    "plt.imshow(test._force()[...,0])\n",
    "\n",
    "#plt.imshow(env.render(\"rgb_array\"))\n",
    "# env.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQN(nn.Module):\n",
    "    def __init__(self, in_channels=4, num_actions=5):\n",
    "        \"\"\"\n",
    "        Initialize a deep Q-learning network as described in\n",
    "        https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf\n",
    "        Arguments:\n",
    "            in_channels: number of channel of input.\n",
    "                i.e The number of most recent frames stacked together as describe in the paper\n",
    "            num_actions: number of action-value to output, one-to-one correspondence to action in game.\n",
    "        \"\"\"\n",
    "        super(DQN, self).__init__()\n",
    "        self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=8, stride=4)\n",
    "        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)\n",
    "        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)\n",
    "        self.fc4 = nn.Linear(7 * 7 * 64, 512)\n",
    "        self.fc5 = nn.Linear(512, num_actions)\n",
    "\n",
    "    def forward(self, x):\n",
    "        x = F.relu(self.conv1(x))\n",
    "        x = F.relu(self.conv2(x))\n",
    "        x = F.relu(self.conv3(x))\n",
    "        x = F.relu(self.fc4(x.reshape(x.size(0), -1)))\n",
    "        return self.fc5(x)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Memory_Buffer(object):\n",
    "    def __init__(self, memory_size=100000):\n",
    "        self.buffer = []\n",
    "        self.memory_size = memory_size\n",
    "        self.next_idx = 0\n",
    "        \n",
    "    def push(self, state, action, reward, next_state, done):\n",
    "        data = (state, action, reward, next_state, done)\n",
    "        if len(self.buffer) <= self.memory_size: # buffer not full\n",
    "            self.buffer.append(data)\n",
    "        else: # buffer is full\n",
    "            self.buffer[self.next_idx] = data\n",
    "        self.next_idx = (self.next_idx + 1) % self.memory_size\n",
    "\n",
    "    def sample(self, batch_size):\n",
    "        states, actions, rewards, next_states, dones = [], [], [], [], []\n",
    "        for i in range(batch_size):\n",
    "            \n",
    "            idx = random.randint(0, self.size() - 1)\n",
    "            data = self.buffer[idx]\n",
    "            state, action, reward, next_state, done= data\n",
    "            states.append(state)\n",
    "            actions.append(action)\n",
    "            rewards.append(reward)\n",
    "            next_states.append(next_state)\n",
    "            dones.append(done)\n",
    "            \n",
    "        return np.concatenate(states), actions, rewards, np.concatenate(next_states), dones\n",
    "    \n",
    "    def size(self):\n",
    "        return len(self.buffer)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNAgent: \n",
    "    def __init__(self, in_channels = 1, action_space = [], USE_CUDA = False, memory_size = 10000, epsilon  = 1, lr = 1e-4):\n",
    "        self.epsilon = epsilon\n",
    "        self.action_space = action_space\n",
    "        self.memory_buffer = Memory_Buffer(memory_size)\n",
    "        self.DQN = DQN(in_channels = in_channels, num_actions = action_space.n)\n",
    "        self.DQN_target = DQN(in_channels = in_channels, num_actions = action_space.n)\n",
    "        self.DQN_target.load_state_dict(self.DQN.state_dict())\n",
    "\n",
    "\n",
    "        self.USE_CUDA = USE_CUDA\n",
    "        if USE_CUDA:\n",
    "            self.DQN = self.DQN.cuda()\n",
    "            self.DQN_target = self.DQN_target.cuda()\n",
    "        self.optimizer = optim.RMSprop(self.DQN.parameters(),lr=lr, eps=0.001, alpha=0.95)\n",
    "\n",
    "    def observe(self, lazyframe):\n",
    "        # from Lazy frame to tensor\n",
    "        state =  torch.from_numpy(lazyframe._force().transpose(2,0,1)[None]/255).float()\n",
    "        if self.USE_CUDA:\n",
    "            state = state.cuda()\n",
    "        return state\n",
    "\n",
    "    def value(self, state):\n",
    "        q_values = self.DQN(state)\n",
    "        return q_values\n",
    "    \n",
    "    def act(self, state, epsilon = None):\n",
    "        \"\"\"\n",
    "        sample actions with epsilon-greedy policy\n",
    "        recap: with p = epsilon pick random action, else pick action with highest Q(s,a)\n",
    "        \"\"\"\n",
    "        if epsilon is None: epsilon = self.epsilon\n",
    "\n",
    "        q_values = self.value(state).cpu().detach().numpy()\n",
    "        if random.random()<epsilon:\n",
    "            aciton = random.randrange(self.action_space.n)\n",
    "        else:\n",
    "            aciton = q_values.argmax(1)[0]\n",
    "        return aciton\n",
    "    \n",
    "    def compute_td_loss(self, states, actions, rewards, next_states, is_done, gamma=0.99):\n",
    "        \"\"\" Compute td loss using torch operations only. Use the formula above. \"\"\"\n",
    "        actions = torch.tensor(actions).long()    # shape: [batch_size]\n",
    "        rewards = torch.tensor(rewards, dtype =torch.float)  # shape: [batch_size]\n",
    "        is_done = torch.tensor(is_done).bool()  # shape: [batch_size]\n",
    "        \n",
    "        if self.USE_CUDA:\n",
    "            actions = actions.cuda()\n",
    "            rewards = rewards.cuda()\n",
    "            is_done = is_done.cuda()\n",
    "\n",
    "        # get q-values for all actions in current states\n",
    "        predicted_qvalues = self.DQN(states)\n",
    "\n",
    "        # select q-values for chosen actions\n",
    "        predicted_qvalues_for_actions = predicted_qvalues[\n",
    "          range(states.shape[0]), actions\n",
    "        ]\n",
    "\n",
    "        # compute q-values for all actions in next states\n",
    "        predicted_next_qvalues = self.DQN_target(next_states) # YOUR CODE\n",
    "\n",
    "        # compute V*(next_states) using predicted next q-values\n",
    "        next_state_values =  predicted_next_qvalues.max(-1)[0] # YOUR CODE\n",
    "\n",
    "        # compute \"target q-values\" for loss - it's what's inside square parentheses in the above formula.\n",
    "        target_qvalues_for_actions = rewards + gamma *next_state_values # YOUR CODE\n",
    "\n",
    "        # at the last state we shall use simplified formula: Q(s,a) = r(s,a) since s' doesn't exist\n",
    "        target_qvalues_for_actions = torch.where(\n",
    "            is_done, rewards, target_qvalues_for_actions)\n",
    "\n",
    "        # mean squared error loss to minimize\n",
    "        #loss = torch.mean((predicted_qvalues_for_actions -\n",
    "        #                   target_qvalues_for_actions.detach()) ** 2)\n",
    "        loss = F.smooth_l1_loss(predicted_qvalues_for_actions, target_qvalues_for_actions.detach())\n",
    "\n",
    "        return loss\n",
    "    \n",
    "    def sample_from_buffer(self, batch_size):\n",
    "        states, actions, rewards, next_states, dones = [], [], [], [], []\n",
    "        for i in range(batch_size):\n",
    "            idx = random.randint(0, self.memory_buffer.size() - 1)\n",
    "            data = self.memory_buffer.buffer[idx]\n",
    "            frame, action, reward, next_frame, done= data\n",
    "            states.append(self.observe(frame))\n",
    "            actions.append(action)\n",
    "            rewards.append(reward)\n",
    "            next_states.append(self.observe(next_frame))\n",
    "            dones.append(done)\n",
    "        return torch.cat(states), actions, rewards, torch.cat(next_states), dones\n",
    "\n",
    "    def learn_from_experience(self, batch_size):\n",
    "        if self.memory_buffer.size() > batch_size:\n",
    "            states, actions, rewards, next_states, dones = self.sample_from_buffer(batch_size)\n",
    "            td_loss = self.compute_td_loss(states, actions, rewards, next_states, dones)\n",
    "            self.optimizer.zero_grad()\n",
    "            td_loss.backward()\n",
    "            for param in self.DQN.parameters():\n",
    "                param.grad.data.clamp_(-1, 1)\n",
    "\n",
    "            self.optimizer.step()\n",
    "            return(td_loss.item())\n",
    "        else:\n",
    "            return(0)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\Softwares\\anaconda3\\envs\\py37\\lib\\site-packages\\numpy\\core\\fromnumeric.py:3257: RuntimeWarning: Mean of empty slice.\n",
      "  out=out, **kwargs)\n",
      "D:\\Softwares\\anaconda3\\envs\\py37\\lib\\site-packages\\numpy\\core\\_methods.py:161: RuntimeWarning: invalid value encountered in double_scalars\n",
      "  ret = ret.dtype.type(ret / rcount)\n",
      "WARNING:root:NaN or Inf found in input tensor.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames:     0, reward:   nan, loss: 0.000000, epsilon: 1.000000, episode:    0\n",
      "frames:  1000, reward: -20.000000, loss: 0.000000, epsilon: 0.967544, episode:    1\n",
      "frames:  2000, reward: -20.500000, loss: 0.000000, epsilon: 0.936152, episode:    2\n",
      "frames:  3000, reward: -20.666667, loss: 0.000000, epsilon: 0.905789, episode:    3\n",
      "frames:  4000, reward: -20.500000, loss: 0.000000, epsilon: 0.876422, episode:    4\n",
      "frames:  5000, reward: -20.600000, loss: 0.000000, epsilon: 0.848017, episode:    5\n",
      "frames:  6000, reward: -20.714286, loss: 0.000000, epsilon: 0.820543, episode:    7\n",
      "frames:  7000, reward: -20.750000, loss: 0.000000, epsilon: 0.793971, episode:    8\n",
      "frames:  8000, reward: -20.777778, loss: 0.000000, epsilon: 0.768269, episode:    9\n",
      "frames:  9000, reward: -20.900000, loss: 0.000000, epsilon: 0.743410, episode:   11\n",
      "frames: 10000, reward: -20.900000, loss: 0.016001, epsilon: 0.719366, episode:   12\n",
      "frames: 11000, reward: -20.900000, loss: 0.000458, epsilon: 0.696110, episode:   13\n",
      "frames: 12000, reward: -21.000000, loss: 0.014937, epsilon: 0.673617, episode:   14\n",
      "frames: 13000, reward: -20.800000, loss: 0.015661, epsilon: 0.651861, episode:   16\n",
      "frames: 14000, reward: -20.600000, loss: 0.030705, epsilon: 0.630818, episode:   17\n",
      "frames: 15000, reward: -20.600000, loss: 0.000392, epsilon: 0.610465, episode:   18\n",
      "frames: 16000, reward: -20.600000, loss: 0.015457, epsilon: 0.590780, episode:   19\n",
      "frames: 17000, reward: -20.600000, loss: 0.015217, epsilon: 0.571740, episode:   20\n",
      "frames: 18000, reward: -20.500000, loss: 0.045171, epsilon: 0.553324, episode:   21\n",
      "frames: 19000, reward: -20.400000, loss: 0.015198, epsilon: 0.535511, episode:   23\n",
      "frames: 20000, reward: -20.400000, loss: 0.000348, epsilon: 0.518283, episode:   23\n",
      "frames: 21000, reward: -20.400000, loss: 0.014947, epsilon: 0.501619, episode:   25\n",
      "frames: 22000, reward: -20.400000, loss: 0.015176, epsilon: 0.485502, episode:   26\n",
      "frames: 23000, reward: -20.600000, loss: 0.000305, epsilon: 0.469913, episode:   27\n",
      "frames: 24000, reward: -20.600000, loss: 0.029793, epsilon: 0.454836, episode:   28\n",
      "frames: 25000, reward: -20.400000, loss: 0.045278, epsilon: 0.440252, episode:   29\n",
      "frames: 26000, reward: -20.300000, loss: 0.011359, epsilon: 0.426147, episode:   30\n",
      "frames: 27000, reward: -20.400000, loss: 0.000449, epsilon: 0.412504, episode:   32\n",
      "frames: 28000, reward: -20.400000, loss: 0.014832, epsilon: 0.399308, episode:   33\n",
      "frames: 29000, reward: -20.500000, loss: 0.000211, epsilon: 0.386545, episode:   34\n",
      "frames: 30000, reward: -20.600000, loss: 0.000246, epsilon: 0.374201, episode:   35\n",
      "frames: 31000, reward: -20.500000, loss: 0.014837, epsilon: 0.362261, episode:   36\n",
      "frames: 32000, reward: -20.400000, loss: 0.000271, epsilon: 0.350712, episode:   37\n",
      "frames: 33000, reward: -20.400000, loss: 0.015252, epsilon: 0.339542, episode:   38\n",
      "frames: 34000, reward: -20.600000, loss: 0.000250, epsilon: 0.328739, episode:   39\n",
      "frames: 35000, reward: -20.700000, loss: 0.014872, epsilon: 0.318289, episode:   41\n",
      "frames: 36000, reward: -20.700000, loss: 0.015330, epsilon: 0.308182, episode:   42\n",
      "frames: 37000, reward: -20.600000, loss: 0.000644, epsilon: 0.298407, episode:   43\n",
      "frames: 38000, reward: -20.600000, loss: 0.015158, epsilon: 0.288952, episode:   44\n",
      "frames: 39000, reward: -20.400000, loss: 0.000163, epsilon: 0.279806, episode:   45\n",
      "frames: 40000, reward: -20.500000, loss: 0.025770, epsilon: 0.270961, episode:   46\n",
      "frames: 41000, reward: -20.600000, loss: 0.010703, epsilon: 0.262406, episode:   47\n",
      "frames: 42000, reward: -20.600000, loss: 0.015089, epsilon: 0.254131, episode:   48\n",
      "frames: 43000, reward: -20.400000, loss: 0.000990, epsilon: 0.246127, episode:   50\n",
      "frames: 44000, reward: -20.300000, loss: 0.013049, epsilon: 0.238386, episode:   51\n",
      "frames: 45000, reward: -20.200000, loss: 0.000637, epsilon: 0.230899, episode:   52\n",
      "frames: 46000, reward: -20.300000, loss: 0.000895, epsilon: 0.223657, episode:   53\n",
      "frames: 47000, reward: -20.200000, loss: 0.013070, epsilon: 0.216652, episode:   54\n",
      "frames: 48000, reward: -20.300000, loss: 0.002363, epsilon: 0.209878, episode:   55\n",
      "frames: 49000, reward: -20.300000, loss: 0.001125, epsilon: 0.203325, episode:   56\n",
      "frames: 50000, reward: -20.200000, loss: 0.007352, epsilon: 0.196987, episode:   58\n",
      "frames: 51000, reward: -20.400000, loss: 0.000742, epsilon: 0.190857, episode:   59\n",
      "frames: 52000, reward: -20.400000, loss: 0.002529, epsilon: 0.184928, episode:   60\n",
      "frames: 53000, reward: -20.500000, loss: 0.001952, epsilon: 0.179193, episode:   61\n",
      "frames: 54000, reward: -20.500000, loss: 0.000945, epsilon: 0.173646, episode:   62\n",
      "frames: 55000, reward: -20.600000, loss: 0.015237, epsilon: 0.168281, episode:   63\n",
      "frames: 56000, reward: -20.700000, loss: 0.001066, epsilon: 0.163092, episode:   64\n",
      "frames: 57000, reward: -20.800000, loss: 0.017346, epsilon: 0.158073, episode:   66\n",
      "frames: 58000, reward: -20.900000, loss: 0.001767, epsilon: 0.153219, episode:   67\n",
      "frames: 59000, reward: -20.900000, loss: 0.000954, epsilon: 0.148523, episode:   68\n",
      "frames: 60000, reward: -20.800000, loss: 0.003485, epsilon: 0.143982, episode:   69\n",
      "frames: 61000, reward: -20.800000, loss: 0.000457, epsilon: 0.139589, episode:   71\n",
      "frames: 62000, reward: -20.900000, loss: 0.002734, epsilon: 0.135341, episode:   72\n",
      "frames: 63000, reward: -20.800000, loss: 0.001159, epsilon: 0.131232, episode:   73\n",
      "frames: 64000, reward: -20.700000, loss: 0.003299, epsilon: 0.127257, episode:   74\n",
      "frames: 65000, reward: -20.500000, loss: 0.002378, epsilon: 0.123413, episode:   75\n",
      "frames: 66000, reward: -20.300000, loss: 0.002315, epsilon: 0.119695, episode:   76\n",
      "frames: 67000, reward: -20.200000, loss: 0.002955, epsilon: 0.116099, episode:   77\n",
      "frames: 68000, reward: -20.200000, loss: 0.001895, epsilon: 0.112621, episode:   79\n",
      "frames: 69000, reward: -20.200000, loss: 0.005554, epsilon: 0.109256, episode:   80\n",
      "frames: 70000, reward: -20.200000, loss: 0.000637, epsilon: 0.106002, episode:   81\n",
      "frames: 71000, reward: -20.100000, loss: 0.003437, epsilon: 0.102855, episode:   82\n",
      "frames: 72000, reward: -20.200000, loss: 0.003195, epsilon: 0.099811, episode:   83\n",
      "frames: 73000, reward: -20.500000, loss: 0.004946, epsilon: 0.096866, episode:   85\n",
      "frames: 74000, reward: -20.700000, loss: 0.003139, epsilon: 0.094019, episode:   86\n",
      "frames: 75000, reward: -20.700000, loss: 0.001204, epsilon: 0.091264, episode:   87\n",
      "frames: 76000, reward: -20.800000, loss: 0.000723, epsilon: 0.088600, episode:   88\n",
      "frames: 77000, reward: -20.800000, loss: 0.001031, epsilon: 0.086023, episode:   89\n",
      "frames: 78000, reward: -20.700000, loss: 0.002287, epsilon: 0.083531, episode:   91\n",
      "frames: 79000, reward: -20.800000, loss: 0.005193, epsilon: 0.081120, episode:   92\n",
      "frames: 80000, reward: -20.700000, loss: 0.000973, epsilon: 0.078789, episode:   93\n",
      "frames: 81000, reward: -20.600000, loss: 0.000938, epsilon: 0.076533, episode:   94\n",
      "frames: 82000, reward: -20.500000, loss: 0.004807, epsilon: 0.074352, episode:   96\n",
      "frames: 83000, reward: -20.600000, loss: 0.000338, epsilon: 0.072243, episode:   97\n",
      "frames: 84000, reward: -20.600000, loss: 0.002048, epsilon: 0.070202, episode:   98\n",
      "frames: 85000, reward: -20.500000, loss: 0.003008, epsilon: 0.068228, episode:   99\n",
      "frames: 86000, reward: -20.400000, loss: 0.002623, epsilon: 0.066319, episode:  100\n",
      "frames: 87000, reward: -20.400000, loss: 0.000763, epsilon: 0.064473, episode:  101\n",
      "frames: 88000, reward: -20.400000, loss: 0.001191, epsilon: 0.062687, episode:  103\n",
      "frames: 89000, reward: -20.400000, loss: 0.002462, epsilon: 0.060960, episode:  104\n",
      "frames: 90000, reward: -20.400000, loss: 0.003478, epsilon: 0.059289, episode:  105\n",
      "frames: 91000, reward: -20.400000, loss: 0.000823, epsilon: 0.057673, episode:  106\n",
      "frames: 92000, reward: -20.300000, loss: 0.004761, epsilon: 0.056110, episode:  108\n",
      "frames: 93000, reward: -20.400000, loss: 0.000644, epsilon: 0.054599, episode:  109\n",
      "frames: 94000, reward: -20.600000, loss: 0.002553, epsilon: 0.053137, episode:  110\n",
      "frames: 95000, reward: -20.600000, loss: 0.004584, epsilon: 0.051722, episode:  111\n",
      "frames: 96000, reward: -20.600000, loss: 0.003538, epsilon: 0.050355, episode:  112\n",
      "frames: 97000, reward: -20.700000, loss: 0.003512, epsilon: 0.049032, episode:  114\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 98000, reward: -20.600000, loss: 0.001436, epsilon: 0.047752, episode:  115\n",
      "frames: 99000, reward: -20.600000, loss: 0.000898, epsilon: 0.046514, episode:  116\n",
      "frames: 100000, reward: -20.700000, loss: 0.000720, epsilon: 0.045317, episode:  117\n",
      "frames: 101000, reward: -20.700000, loss: 0.000382, epsilon: 0.044159, episode:  118\n",
      "frames: 102000, reward: -20.700000, loss: 0.001637, epsilon: 0.043040, episode:  119\n",
      "frames: 103000, reward: -20.600000, loss: 0.003110, epsilon: 0.041956, episode:  120\n",
      "frames: 104000, reward: -20.400000, loss: 0.003492, epsilon: 0.040909, episode:  121\n",
      "frames: 105000, reward: -20.400000, loss: 0.001239, epsilon: 0.039895, episode:  122\n",
      "frames: 106000, reward: -20.300000, loss: 0.000485, epsilon: 0.038915, episode:  124\n",
      "frames: 107000, reward: -20.400000, loss: 0.003168, epsilon: 0.037967, episode:  125\n",
      "frames: 108000, reward: -20.400000, loss: 0.001665, epsilon: 0.037050, episode:  125\n",
      "frames: 109000, reward: -20.100000, loss: 0.001003, epsilon: 0.036164, episode:  126\n",
      "frames: 110000, reward: -20.100000, loss: 0.003477, epsilon: 0.035306, episode:  127\n",
      "frames: 111000, reward: -20.100000, loss: 0.001116, epsilon: 0.034476, episode:  128\n",
      "frames: 112000, reward: -20.000000, loss: 0.001492, epsilon: 0.033674, episode:  129\n",
      "frames: 113000, reward: -20.000000, loss: 0.006361, epsilon: 0.032898, episode:  130\n",
      "frames: 114000, reward: -19.900000, loss: 0.000855, epsilon: 0.032147, episode:  131\n",
      "frames: 115000, reward: -19.900000, loss: 0.001214, epsilon: 0.031421, episode:  132\n",
      "frames: 116000, reward: -19.800000, loss: 0.001302, epsilon: 0.030719, episode:  133\n",
      "frames: 117000, reward: -19.600000, loss: 0.001606, epsilon: 0.030039, episode:  134\n",
      "frames: 118000, reward: -19.600000, loss: 0.001083, epsilon: 0.029383, episode:  134\n",
      "frames: 119000, reward: -19.400000, loss: 0.003237, epsilon: 0.028747, episode:  135\n",
      "frames: 120000, reward: -19.700000, loss: 0.002889, epsilon: 0.028132, episode:  136\n",
      "frames: 121000, reward: -19.500000, loss: 0.003418, epsilon: 0.027538, episode:  137\n",
      "frames: 122000, reward: -19.400000, loss: 0.002449, epsilon: 0.026963, episode:  138\n",
      "frames: 123000, reward: -19.400000, loss: 0.002687, epsilon: 0.026407, episode:  139\n",
      "frames: 124000, reward: -19.400000, loss: 0.001058, epsilon: 0.025869, episode:  139\n",
      "frames: 125000, reward: -19.300000, loss: 0.003374, epsilon: 0.025349, episode:  140\n",
      "frames: 126000, reward: -19.400000, loss: 0.000631, epsilon: 0.024846, episode:  141\n",
      "frames: 127000, reward: -19.300000, loss: 0.002335, epsilon: 0.024359, episode:  142\n",
      "frames: 128000, reward: -19.500000, loss: 0.000795, epsilon: 0.023888, episode:  143\n",
      "frames: 129000, reward: -19.500000, loss: 0.000881, epsilon: 0.023433, episode:  143\n",
      "frames: 130000, reward: -19.600000, loss: 0.000690, epsilon: 0.022992, episode:  144\n",
      "frames: 131000, reward: -19.600000, loss: 0.002737, epsilon: 0.022567, episode:  145\n",
      "frames: 132000, reward: -19.600000, loss: 0.002355, epsilon: 0.022155, episode:  145\n",
      "frames: 133000, reward: -19.700000, loss: 0.001414, epsilon: 0.021756, episode:  146\n",
      "frames: 134000, reward: -19.900000, loss: 0.003316, epsilon: 0.021371, episode:  147\n",
      "frames: 135000, reward: -19.600000, loss: 0.000757, epsilon: 0.020998, episode:  148\n",
      "frames: 136000, reward: -19.600000, loss: 0.001451, epsilon: 0.020637, episode:  148\n",
      "frames: 137000, reward: -19.600000, loss: 0.006810, epsilon: 0.020289, episode:  149\n",
      "frames: 138000, reward: -19.300000, loss: 0.001290, epsilon: 0.019951, episode:  150\n",
      "frames: 139000, reward: -19.300000, loss: 0.000876, epsilon: 0.019625, episode:  151\n",
      "frames: 140000, reward: -19.300000, loss: 0.006425, epsilon: 0.019310, episode:  151\n",
      "frames: 141000, reward: -19.400000, loss: 0.001647, epsilon: 0.019004, episode:  152\n",
      "frames: 142000, reward: -19.400000, loss: 0.000653, epsilon: 0.018709, episode:  153\n",
      "frames: 143000, reward: -19.400000, loss: 0.020436, epsilon: 0.018424, episode:  154\n",
      "frames: 144000, reward: -19.400000, loss: 0.005036, epsilon: 0.018147, episode:  155\n",
      "frames: 145000, reward: -19.300000, loss: 0.000910, epsilon: 0.017880, episode:  156\n",
      "frames: 146000, reward: -19.200000, loss: 0.002417, epsilon: 0.017622, episode:  157\n",
      "frames: 147000, reward: -19.500000, loss: 0.002748, epsilon: 0.017372, episode:  158\n",
      "frames: 148000, reward: -19.400000, loss: 0.002467, epsilon: 0.017130, episode:  159\n",
      "frames: 149000, reward: -19.500000, loss: 0.002201, epsilon: 0.016897, episode:  160\n",
      "frames: 150000, reward: -19.400000, loss: 0.001024, epsilon: 0.016671, episode:  161\n",
      "frames: 151000, reward: -19.400000, loss: 0.003916, epsilon: 0.016452, episode:  161\n",
      "frames: 152000, reward: -19.500000, loss: 0.003319, epsilon: 0.016240, episode:  162\n",
      "frames: 153000, reward: -19.500000, loss: 0.000671, epsilon: 0.016036, episode:  163\n",
      "frames: 154000, reward: -19.500000, loss: 0.014444, epsilon: 0.015838, episode:  163\n",
      "frames: 155000, reward: -19.200000, loss: 0.002713, epsilon: 0.015647, episode:  164\n",
      "frames: 156000, reward: -19.300000, loss: 0.004755, epsilon: 0.015461, episode:  165\n",
      "frames: 157000, reward: -19.300000, loss: 0.002369, epsilon: 0.015282, episode:  165\n",
      "frames: 158000, reward: -19.000000, loss: 0.001745, epsilon: 0.015109, episode:  166\n",
      "frames: 159000, reward: -19.000000, loss: 0.001418, epsilon: 0.014942, episode:  167\n",
      "frames: 160000, reward: -19.000000, loss: 0.003575, epsilon: 0.014780, episode:  167\n",
      "frames: 161000, reward: -18.900000, loss: 0.000690, epsilon: 0.014623, episode:  168\n",
      "frames: 162000, reward: -18.400000, loss: 0.003265, epsilon: 0.014471, episode:  169\n",
      "frames: 163000, reward: -18.400000, loss: 0.001432, epsilon: 0.014325, episode:  169\n",
      "frames: 164000, reward: -18.500000, loss: 0.009079, epsilon: 0.014183, episode:  170\n",
      "frames: 165000, reward: -18.500000, loss: 0.000858, epsilon: 0.014046, episode:  171\n",
      "frames: 166000, reward: -18.500000, loss: 0.002047, epsilon: 0.013913, episode:  171\n",
      "frames: 167000, reward: -18.200000, loss: 0.001963, epsilon: 0.013785, episode:  172\n",
      "frames: 168000, reward: -17.700000, loss: 0.001830, epsilon: 0.013661, episode:  173\n",
      "frames: 169000, reward: -17.700000, loss: 0.002148, epsilon: 0.013541, episode:  173\n",
      "frames: 170000, reward: -17.300000, loss: 0.003559, epsilon: 0.013425, episode:  174\n",
      "frames: 171000, reward: -16.900000, loss: 0.002428, epsilon: 0.013313, episode:  175\n",
      "frames: 172000, reward: -16.900000, loss: 0.001504, epsilon: 0.013204, episode:  175\n",
      "frames: 173000, reward: -16.600000, loss: 0.005690, epsilon: 0.013099, episode:  176\n",
      "frames: 174000, reward: -16.600000, loss: 0.002404, epsilon: 0.012997, episode:  176\n",
      "frames: 175000, reward: -15.900000, loss: 0.003182, epsilon: 0.012899, episode:  177\n",
      "frames: 176000, reward: -15.900000, loss: 0.001487, epsilon: 0.012804, episode:  177\n",
      "frames: 177000, reward: -16.000000, loss: 0.002872, epsilon: 0.012712, episode:  178\n",
      "frames: 178000, reward: -16.500000, loss: 0.001898, epsilon: 0.012623, episode:  179\n",
      "frames: 179000, reward: -16.500000, loss: 0.001555, epsilon: 0.012537, episode:  179\n",
      "frames: 180000, reward: -16.000000, loss: 0.001289, epsilon: 0.012454, episode:  180\n",
      "frames: 181000, reward: -16.000000, loss: 0.002487, epsilon: 0.012374, episode:  180\n",
      "frames: 182000, reward: -15.800000, loss: 0.006833, epsilon: 0.012296, episode:  181\n",
      "frames: 183000, reward: -16.100000, loss: 0.001351, epsilon: 0.012220, episode:  182\n",
      "frames: 184000, reward: -16.100000, loss: 0.002600, epsilon: 0.012148, episode:  182\n",
      "frames: 185000, reward: -16.300000, loss: 0.002883, epsilon: 0.012077, episode:  183\n",
      "frames: 186000, reward: -16.300000, loss: 0.003550, epsilon: 0.012009, episode:  183\n",
      "frames: 187000, reward: -16.200000, loss: 0.006344, epsilon: 0.011943, episode:  184\n",
      "frames: 188000, reward: -16.500000, loss: 0.001619, epsilon: 0.011880, episode:  185\n",
      "frames: 189000, reward: -16.500000, loss: 0.004125, epsilon: 0.011818, episode:  185\n",
      "frames: 190000, reward: -16.700000, loss: 0.004698, epsilon: 0.011758, episode:  186\n",
      "frames: 191000, reward: -16.700000, loss: 0.002882, epsilon: 0.011701, episode:  186\n",
      "frames: 192000, reward: -17.200000, loss: 0.001728, epsilon: 0.011645, episode:  187\n",
      "frames: 193000, reward: -17.100000, loss: 0.002064, epsilon: 0.011591, episode:  188\n",
      "frames: 194000, reward: -17.100000, loss: 0.001000, epsilon: 0.011539, episode:  188\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 195000, reward: -16.400000, loss: 0.002594, epsilon: 0.011488, episode:  189\n",
      "frames: 196000, reward: -16.400000, loss: 0.002122, epsilon: 0.011440, episode:  189\n",
      "frames: 197000, reward: -16.200000, loss: 0.004345, epsilon: 0.011392, episode:  190\n",
      "frames: 198000, reward: -16.200000, loss: 0.004849, epsilon: 0.011347, episode:  190\n",
      "frames: 199000, reward: -15.900000, loss: 0.002643, epsilon: 0.011303, episode:  191\n",
      "frames: 200000, reward: -15.900000, loss: 0.001229, epsilon: 0.011260, episode:  191\n",
      "frames: 201000, reward: -14.800000, loss: 0.004481, epsilon: 0.011219, episode:  192\n",
      "frames: 202000, reward: -14.800000, loss: 0.002413, epsilon: 0.011179, episode:  192\n",
      "frames: 203000, reward: -13.800000, loss: 0.003865, epsilon: 0.011140, episode:  193\n",
      "frames: 204000, reward: -13.800000, loss: 0.006109, epsilon: 0.011103, episode:  193\n",
      "frames: 205000, reward: -13.600000, loss: 0.003437, epsilon: 0.011066, episode:  194\n",
      "frames: 206000, reward: -13.600000, loss: 0.006738, epsilon: 0.011032, episode:  194\n",
      "frames: 207000, reward: -13.700000, loss: 0.003224, epsilon: 0.010998, episode:  195\n",
      "frames: 208000, reward: -13.200000, loss: 0.004931, epsilon: 0.010965, episode:  196\n",
      "frames: 209000, reward: -13.200000, loss: 0.001520, epsilon: 0.010933, episode:  196\n",
      "frames: 210000, reward: -13.400000, loss: 0.002545, epsilon: 0.010903, episode:  197\n",
      "frames: 211000, reward: -13.400000, loss: 0.001538, epsilon: 0.010873, episode:  197\n",
      "frames: 212000, reward: -13.200000, loss: 0.001736, epsilon: 0.010845, episode:  198\n",
      "frames: 213000, reward: -13.200000, loss: 0.001490, epsilon: 0.010817, episode:  198\n",
      "frames: 214000, reward: -12.900000, loss: 0.001694, epsilon: 0.010790, episode:  199\n",
      "frames: 215000, reward: -12.900000, loss: 0.001337, epsilon: 0.010764, episode:  199\n",
      "frames: 216000, reward: -12.600000, loss: 0.002013, epsilon: 0.010739, episode:  200\n",
      "frames: 217000, reward: -13.000000, loss: 0.003288, epsilon: 0.010715, episode:  201\n",
      "frames: 218000, reward: -13.000000, loss: 0.000889, epsilon: 0.010691, episode:  201\n",
      "frames: 219000, reward: -13.900000, loss: 0.011693, epsilon: 0.010669, episode:  202\n",
      "frames: 220000, reward: -15.100000, loss: 0.003124, epsilon: 0.010647, episode:  203\n",
      "frames: 221000, reward: -15.100000, loss: 0.001557, epsilon: 0.010626, episode:  203\n",
      "frames: 222000, reward: -15.200000, loss: 0.002685, epsilon: 0.010605, episode:  204\n",
      "frames: 223000, reward: -15.200000, loss: 0.001570, epsilon: 0.010585, episode:  204\n",
      "frames: 224000, reward: -14.300000, loss: 0.002237, epsilon: 0.010566, episode:  205\n",
      "frames: 225000, reward: -14.300000, loss: 0.001554, epsilon: 0.010548, episode:  205\n",
      "frames: 226000, reward: -15.100000, loss: 0.001995, epsilon: 0.010530, episode:  206\n",
      "frames: 227000, reward: -15.100000, loss: 0.007631, epsilon: 0.010512, episode:  206\n",
      "frames: 228000, reward: -15.100000, loss: 0.002626, epsilon: 0.010495, episode:  206\n",
      "frames: 229000, reward: -13.500000, loss: 0.001464, epsilon: 0.010479, episode:  207\n",
      "frames: 230000, reward: -13.500000, loss: 0.001225, epsilon: 0.010463, episode:  207\n",
      "frames: 231000, reward: -13.400000, loss: 0.001333, epsilon: 0.010448, episode:  208\n",
      "frames: 232000, reward: -13.400000, loss: 0.002175, epsilon: 0.010434, episode:  208\n",
      "frames: 233000, reward: -13.400000, loss: 0.002915, epsilon: 0.010419, episode:  208\n",
      "frames: 234000, reward: -13.300000, loss: 0.001421, epsilon: 0.010406, episode:  209\n",
      "frames: 235000, reward: -13.300000, loss: 0.002757, epsilon: 0.010392, episode:  209\n",
      "frames: 236000, reward: -14.000000, loss: 0.005132, epsilon: 0.010379, episode:  210\n",
      "frames: 237000, reward: -14.000000, loss: 0.003761, epsilon: 0.010367, episode:  210\n",
      "frames: 238000, reward: -14.000000, loss: 0.002055, epsilon: 0.010355, episode:  210\n",
      "frames: 239000, reward: -13.100000, loss: 0.001445, epsilon: 0.010343, episode:  211\n",
      "frames: 240000, reward: -13.100000, loss: 0.003161, epsilon: 0.010332, episode:  211\n",
      "frames: 241000, reward: -11.600000, loss: 0.004630, epsilon: 0.010321, episode:  212\n",
      "frames: 242000, reward: -11.600000, loss: 0.006189, epsilon: 0.010311, episode:  212\n",
      "frames: 243000, reward: -11.400000, loss: 0.001512, epsilon: 0.010301, episode:  213\n",
      "frames: 244000, reward: -11.400000, loss: 0.001527, epsilon: 0.010291, episode:  213\n",
      "frames: 245000, reward: -10.900000, loss: 0.001786, epsilon: 0.010281, episode:  214\n",
      "frames: 246000, reward: -10.900000, loss: 0.003178, epsilon: 0.010272, episode:  214\n",
      "frames: 247000, reward: -11.400000, loss: 0.000817, epsilon: 0.010263, episode:  215\n",
      "frames: 248000, reward: -11.400000, loss: 0.002735, epsilon: 0.010254, episode:  215\n",
      "frames: 249000, reward: -10.500000, loss: 0.002879, epsilon: 0.010246, episode:  216\n",
      "frames: 250000, reward: -10.500000, loss: 0.001917, epsilon: 0.010238, episode:  216\n",
      "frames: 251000, reward: -10.800000, loss: 0.001566, epsilon: 0.010230, episode:  217\n",
      "frames: 252000, reward: -10.800000, loss: 0.002987, epsilon: 0.010223, episode:  217\n",
      "frames: 253000, reward: -10.800000, loss: 0.005235, epsilon: 0.010215, episode:  217\n",
      "frames: 254000, reward: -10.600000, loss: 0.001630, epsilon: 0.010208, episode:  218\n",
      "frames: 255000, reward: -10.600000, loss: 0.009278, epsilon: 0.010201, episode:  218\n",
      "frames: 256000, reward: -10.600000, loss: 0.003482, epsilon: 0.010195, episode:  218\n",
      "frames: 257000, reward: -10.300000, loss: 0.004704, epsilon: 0.010188, episode:  219\n",
      "frames: 258000, reward: -10.300000, loss: 0.001217, epsilon: 0.010182, episode:  219\n",
      "frames: 259000, reward: -9.700000, loss: 0.001532, epsilon: 0.010176, episode:  220\n",
      "frames: 260000, reward: -9.700000, loss: 0.002828, epsilon: 0.010171, episode:  220\n",
      "frames: 261000, reward: -9.700000, loss: 0.003590, epsilon: 0.010165, episode:  220\n",
      "frames: 262000, reward: -9.500000, loss: 0.002658, epsilon: 0.010160, episode:  221\n",
      "frames: 263000, reward: -9.500000, loss: 0.000656, epsilon: 0.010154, episode:  221\n",
      "frames: 264000, reward: -9.500000, loss: 0.002389, epsilon: 0.010149, episode:  221\n",
      "frames: 265000, reward: -9.700000, loss: 0.002733, epsilon: 0.010144, episode:  222\n",
      "frames: 266000, reward: -9.700000, loss: 0.001632, epsilon: 0.010140, episode:  222\n",
      "frames: 267000, reward: -9.700000, loss: 0.001537, epsilon: 0.010135, episode:  222\n",
      "frames: 268000, reward: -8.600000, loss: 0.002067, epsilon: 0.010131, episode:  223\n",
      "frames: 269000, reward: -8.600000, loss: 0.004557, epsilon: 0.010126, episode:  223\n",
      "frames: 270000, reward: -8.600000, loss: 0.000638, epsilon: 0.010122, episode:  223\n",
      "frames: 271000, reward: -8.500000, loss: 0.001286, epsilon: 0.010118, episode:  224\n",
      "frames: 272000, reward: -8.500000, loss: 0.004731, epsilon: 0.010114, episode:  224\n",
      "frames: 273000, reward: -8.500000, loss: 0.003743, epsilon: 0.010111, episode:  224\n",
      "frames: 274000, reward: -7.500000, loss: 0.002056, epsilon: 0.010107, episode:  225\n",
      "frames: 275000, reward: -7.500000, loss: 0.003639, epsilon: 0.010103, episode:  225\n",
      "frames: 276000, reward: -7.500000, loss: 0.004115, epsilon: 0.010100, episode:  225\n",
      "frames: 277000, reward: -7.100000, loss: 0.000868, epsilon: 0.010097, episode:  226\n",
      "frames: 278000, reward: -7.100000, loss: 0.002127, epsilon: 0.010094, episode:  226\n",
      "frames: 279000, reward: -8.000000, loss: 0.006733, epsilon: 0.010091, episode:  227\n",
      "frames: 280000, reward: -8.000000, loss: 0.006843, epsilon: 0.010088, episode:  227\n",
      "frames: 281000, reward: -8.000000, loss: 0.001715, epsilon: 0.010085, episode:  227\n",
      "frames: 282000, reward: -7.300000, loss: 0.001278, epsilon: 0.010082, episode:  228\n",
      "frames: 283000, reward: -7.300000, loss: 0.002145, epsilon: 0.010079, episode:  228\n",
      "frames: 284000, reward: -7.300000, loss: 0.001512, epsilon: 0.010077, episode:  228\n",
      "frames: 285000, reward: -6.400000, loss: 0.003063, epsilon: 0.010074, episode:  229\n",
      "frames: 286000, reward: -6.400000, loss: 0.002186, epsilon: 0.010072, episode:  229\n",
      "frames: 287000, reward: -6.400000, loss: 0.005685, epsilon: 0.010069, episode:  229\n",
      "frames: 288000, reward: -5.400000, loss: 0.001211, epsilon: 0.010067, episode:  230\n",
      "frames: 289000, reward: -5.400000, loss: 0.004052, epsilon: 0.010065, episode:  230\n",
      "frames: 290000, reward: -5.400000, loss: 0.001937, epsilon: 0.010063, episode:  230\n",
      "frames: 291000, reward: -5.400000, loss: 0.005813, epsilon: 0.010061, episode:  230\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 292000, reward: -5.000000, loss: 0.001510, epsilon: 0.010059, episode:  231\n",
      "frames: 293000, reward: -5.000000, loss: 0.003136, epsilon: 0.010057, episode:  231\n",
      "frames: 294000, reward: -5.000000, loss: 0.001707, epsilon: 0.010055, episode:  231\n",
      "frames: 295000, reward: -5.000000, loss: 0.003124, epsilon: 0.010053, episode:  231\n",
      "frames: 296000, reward: -4.900000, loss: 0.002297, epsilon: 0.010051, episode:  232\n",
      "frames: 297000, reward: -4.900000, loss: 0.001620, epsilon: 0.010050, episode:  232\n",
      "frames: 298000, reward: -4.900000, loss: 0.002273, epsilon: 0.010048, episode:  232\n",
      "frames: 299000, reward: -4.900000, loss: 0.002812, epsilon: 0.010046, episode:  232\n",
      "frames: 300000, reward: -4.700000, loss: 0.005490, epsilon: 0.010045, episode:  233\n",
      "frames: 301000, reward: -4.700000, loss: 0.002921, epsilon: 0.010043, episode:  233\n",
      "frames: 302000, reward: -4.700000, loss: 0.001346, epsilon: 0.010042, episode:  233\n",
      "frames: 303000, reward: -4.600000, loss: 0.001394, epsilon: 0.010041, episode:  234\n",
      "frames: 304000, reward: -4.600000, loss: 0.002084, epsilon: 0.010039, episode:  234\n",
      "frames: 305000, reward: -4.600000, loss: 0.000875, epsilon: 0.010038, episode:  234\n",
      "frames: 306000, reward: -4.800000, loss: 0.002666, epsilon: 0.010037, episode:  235\n",
      "frames: 307000, reward: -4.800000, loss: 0.003759, epsilon: 0.010036, episode:  235\n",
      "frames: 308000, reward: -4.800000, loss: 0.001603, epsilon: 0.010034, episode:  235\n",
      "frames: 309000, reward: -4.800000, loss: 0.001865, epsilon: 0.010033, episode:  235\n",
      "frames: 310000, reward: -4.400000, loss: 0.001259, epsilon: 0.010032, episode:  236\n",
      "frames: 311000, reward: -4.400000, loss: 0.001687, epsilon: 0.010031, episode:  236\n",
      "frames: 312000, reward: -4.400000, loss: 0.003999, epsilon: 0.010030, episode:  236\n",
      "frames: 313000, reward: -4.400000, loss: 0.000891, epsilon: 0.010029, episode:  236\n",
      "frames: 314000, reward: -2.400000, loss: 0.002141, epsilon: 0.010028, episode:  237\n",
      "frames: 315000, reward: -2.400000, loss: 0.001597, epsilon: 0.010027, episode:  237\n",
      "frames: 316000, reward: -2.400000, loss: 0.001600, epsilon: 0.010026, episode:  237\n",
      "frames: 317000, reward: -2.300000, loss: 0.004135, epsilon: 0.010026, episode:  238\n",
      "frames: 318000, reward: -2.300000, loss: 0.003918, epsilon: 0.010025, episode:  238\n",
      "frames: 319000, reward: -2.300000, loss: 0.002456, epsilon: 0.010024, episode:  238\n",
      "frames: 320000, reward: -3.100000, loss: 0.003852, epsilon: 0.010023, episode:  239\n",
      "frames: 321000, reward: -3.100000, loss: 0.002234, epsilon: 0.010022, episode:  239\n",
      "frames: 322000, reward: -3.100000, loss: 0.005388, epsilon: 0.010022, episode:  239\n",
      "frames: 323000, reward: -3.600000, loss: 0.006041, epsilon: 0.010021, episode:  240\n",
      "frames: 324000, reward: -3.600000, loss: 0.005457, epsilon: 0.010020, episode:  240\n",
      "frames: 325000, reward: -3.600000, loss: 0.001652, epsilon: 0.010020, episode:  240\n",
      "frames: 326000, reward: -3.600000, loss: 0.000970, epsilon: 0.010019, episode:  240\n",
      "frames: 327000, reward: -3.800000, loss: 0.003957, epsilon: 0.010018, episode:  241\n",
      "frames: 328000, reward: -3.800000, loss: 0.000990, epsilon: 0.010018, episode:  241\n",
      "frames: 329000, reward: -3.800000, loss: 0.002670, epsilon: 0.010017, episode:  241\n",
      "frames: 330000, reward: -3.100000, loss: 0.001791, epsilon: 0.010017, episode:  242\n",
      "frames: 331000, reward: -3.100000, loss: 0.000806, epsilon: 0.010016, episode:  242\n",
      "frames: 332000, reward: -3.100000, loss: 0.001762, epsilon: 0.010015, episode:  242\n",
      "frames: 333000, reward: -3.100000, loss: 0.001669, epsilon: 0.010015, episode:  242\n",
      "frames: 334000, reward: -3.100000, loss: 0.002789, epsilon: 0.010014, episode:  243\n",
      "frames: 335000, reward: -3.100000, loss: 0.001022, epsilon: 0.010014, episode:  243\n",
      "frames: 336000, reward: -3.100000, loss: 0.000926, epsilon: 0.010014, episode:  243\n",
      "frames: 337000, reward: -3.100000, loss: 0.004170, epsilon: 0.010013, episode:  243\n",
      "frames: 338000, reward: -2.300000, loss: 0.006950, epsilon: 0.010013, episode:  244\n",
      "frames: 339000, reward: -2.300000, loss: 0.001071, epsilon: 0.010012, episode:  244\n",
      "frames: 340000, reward: -2.300000, loss: 0.003892, epsilon: 0.010012, episode:  244\n",
      "frames: 341000, reward: -2.300000, loss: 0.001884, epsilon: 0.010011, episode:  244\n",
      "frames: 342000, reward: -1.400000, loss: 0.002121, epsilon: 0.010011, episode:  245\n",
      "frames: 343000, reward: -1.400000, loss: 0.002785, epsilon: 0.010011, episode:  245\n",
      "frames: 344000, reward: -1.400000, loss: 0.001701, epsilon: 0.010010, episode:  245\n",
      "frames: 345000, reward: -1.800000, loss: 0.001623, epsilon: 0.010010, episode:  246\n",
      "frames: 346000, reward: -1.800000, loss: 0.003112, epsilon: 0.010010, episode:  246\n",
      "frames: 347000, reward: -1.800000, loss: 0.006643, epsilon: 0.010009, episode:  246\n",
      "frames: 348000, reward: -1.500000, loss: 0.007562, epsilon: 0.010009, episode:  247\n",
      "frames: 349000, reward: -1.500000, loss: 0.002640, epsilon: 0.010009, episode:  247\n",
      "frames: 350000, reward: -1.500000, loss: 0.002461, epsilon: 0.010008, episode:  247\n",
      "frames: 351000, reward: -0.400000, loss: 0.003042, epsilon: 0.010008, episode:  248\n",
      "frames: 352000, reward: -0.400000, loss: 0.000925, epsilon: 0.010008, episode:  248\n",
      "frames: 353000, reward: -0.400000, loss: 0.001935, epsilon: 0.010008, episode:  248\n",
      "frames: 354000, reward: 1.100000, loss: 0.002546, epsilon: 0.010007, episode:  249\n",
      "frames: 355000, reward: 1.100000, loss: 0.002888, epsilon: 0.010007, episode:  249\n",
      "frames: 356000, reward: 1.100000, loss: 0.001400, epsilon: 0.010007, episode:  249\n",
      "frames: 357000, reward: 2.300000, loss: 0.003752, epsilon: 0.010007, episode:  250\n",
      "frames: 358000, reward: 2.300000, loss: 0.002032, epsilon: 0.010007, episode:  250\n",
      "frames: 359000, reward: 2.300000, loss: 0.001657, epsilon: 0.010006, episode:  250\n",
      "frames: 360000, reward: 2.000000, loss: 0.001240, epsilon: 0.010006, episode:  251\n",
      "frames: 361000, reward: 2.000000, loss: 0.006985, epsilon: 0.010006, episode:  251\n",
      "frames: 362000, reward: 2.000000, loss: 0.002149, epsilon: 0.010006, episode:  251\n",
      "frames: 363000, reward: 1.600000, loss: 0.004036, epsilon: 0.010006, episode:  252\n",
      "frames: 364000, reward: 1.600000, loss: 0.001709, epsilon: 0.010005, episode:  252\n",
      "frames: 365000, reward: 1.600000, loss: 0.004680, epsilon: 0.010005, episode:  252\n",
      "frames: 366000, reward: 1.600000, loss: 0.001068, epsilon: 0.010005, episode:  253\n",
      "frames: 367000, reward: 1.600000, loss: 0.001718, epsilon: 0.010005, episode:  253\n",
      "frames: 368000, reward: 1.600000, loss: 0.002118, epsilon: 0.010005, episode:  253\n",
      "frames: 369000, reward: 1.100000, loss: 0.001756, epsilon: 0.010005, episode:  254\n",
      "frames: 370000, reward: 1.100000, loss: 0.001343, epsilon: 0.010004, episode:  254\n",
      "frames: 371000, reward: 1.100000, loss: 0.009104, epsilon: 0.010004, episode:  254\n",
      "frames: 372000, reward: 1.700000, loss: 0.009835, epsilon: 0.010004, episode:  255\n",
      "frames: 373000, reward: 1.700000, loss: 0.001774, epsilon: 0.010004, episode:  255\n",
      "frames: 374000, reward: 1.700000, loss: 0.003664, epsilon: 0.010004, episode:  255\n",
      "frames: 375000, reward: 1.700000, loss: 0.001574, epsilon: 0.010004, episode:  255\n",
      "frames: 376000, reward: 2.800000, loss: 0.002250, epsilon: 0.010004, episode:  256\n",
      "frames: 377000, reward: 2.800000, loss: 0.003341, epsilon: 0.010003, episode:  256\n",
      "frames: 378000, reward: 2.800000, loss: 0.002784, epsilon: 0.010003, episode:  256\n",
      "frames: 379000, reward: 2.500000, loss: 0.002814, epsilon: 0.010003, episode:  257\n",
      "frames: 380000, reward: 2.500000, loss: 0.001880, epsilon: 0.010003, episode:  257\n",
      "frames: 381000, reward: 2.500000, loss: 0.001596, epsilon: 0.010003, episode:  257\n",
      "frames: 382000, reward: 2.500000, loss: 0.006804, epsilon: 0.010003, episode:  257\n",
      "frames: 383000, reward: 2.300000, loss: 0.001588, epsilon: 0.010003, episode:  258\n",
      "frames: 384000, reward: 2.300000, loss: 0.001792, epsilon: 0.010003, episode:  258\n",
      "frames: 385000, reward: 2.300000, loss: 0.001440, epsilon: 0.010003, episode:  258\n",
      "frames: 386000, reward: 1.100000, loss: 0.002278, epsilon: 0.010003, episode:  259\n",
      "frames: 387000, reward: 1.100000, loss: 0.002343, epsilon: 0.010002, episode:  259\n",
      "frames: 388000, reward: 1.100000, loss: 0.005701, epsilon: 0.010002, episode:  259\n",
      "frames: 389000, reward: 1.100000, loss: 0.001117, epsilon: 0.010002, episode:  259\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 390000, reward: 0.000000, loss: 0.003318, epsilon: 0.010002, episode:  260\n",
      "frames: 391000, reward: 0.000000, loss: 0.002204, epsilon: 0.010002, episode:  260\n",
      "frames: 392000, reward: 0.000000, loss: 0.002704, epsilon: 0.010002, episode:  260\n",
      "frames: 393000, reward: 1.300000, loss: 0.002784, epsilon: 0.010002, episode:  261\n",
      "frames: 394000, reward: 1.300000, loss: 0.002304, epsilon: 0.010002, episode:  261\n",
      "frames: 395000, reward: 1.300000, loss: 0.004415, epsilon: 0.010002, episode:  261\n",
      "frames: 396000, reward: 2.200000, loss: 0.000767, epsilon: 0.010002, episode:  262\n",
      "frames: 397000, reward: 2.200000, loss: 0.002339, epsilon: 0.010002, episode:  262\n",
      "frames: 398000, reward: 2.200000, loss: 0.001350, epsilon: 0.010002, episode:  262\n",
      "frames: 399000, reward: 3.500000, loss: 0.000914, epsilon: 0.010002, episode:  263\n",
      "frames: 400000, reward: 3.500000, loss: 0.001782, epsilon: 0.010002, episode:  263\n",
      "frames: 401000, reward: 3.500000, loss: 0.002356, epsilon: 0.010002, episode:  263\n",
      "frames: 402000, reward: 4.700000, loss: 0.000817, epsilon: 0.010001, episode:  264\n",
      "frames: 403000, reward: 4.700000, loss: 0.001326, epsilon: 0.010001, episode:  264\n",
      "frames: 404000, reward: 4.700000, loss: 0.000743, epsilon: 0.010001, episode:  264\n",
      "frames: 405000, reward: 5.000000, loss: 0.002439, epsilon: 0.010001, episode:  265\n",
      "frames: 406000, reward: 5.000000, loss: 0.001042, epsilon: 0.010001, episode:  265\n",
      "frames: 407000, reward: 5.000000, loss: 0.001472, epsilon: 0.010001, episode:  265\n",
      "frames: 408000, reward: 5.000000, loss: 0.002297, epsilon: 0.010001, episode:  266\n",
      "frames: 409000, reward: 5.000000, loss: 0.003204, epsilon: 0.010001, episode:  266\n",
      "frames: 410000, reward: 5.000000, loss: 0.005486, epsilon: 0.010001, episode:  266\n",
      "frames: 411000, reward: 5.000000, loss: 0.002086, epsilon: 0.010001, episode:  266\n",
      "frames: 412000, reward: 5.200000, loss: 0.001620, epsilon: 0.010001, episode:  267\n",
      "frames: 413000, reward: 5.200000, loss: 0.001623, epsilon: 0.010001, episode:  267\n",
      "frames: 414000, reward: 5.200000, loss: 0.001326, epsilon: 0.010001, episode:  267\n",
      "frames: 415000, reward: 5.800000, loss: 0.002426, epsilon: 0.010001, episode:  268\n",
      "frames: 416000, reward: 5.800000, loss: 0.004091, epsilon: 0.010001, episode:  268\n",
      "frames: 417000, reward: 7.800000, loss: 0.002490, epsilon: 0.010001, episode:  269\n",
      "frames: 418000, reward: 7.800000, loss: 0.001097, epsilon: 0.010001, episode:  269\n",
      "frames: 419000, reward: 7.800000, loss: 0.003134, epsilon: 0.010001, episode:  269\n",
      "frames: 420000, reward: 7.800000, loss: 0.002326, epsilon: 0.010001, episode:  269\n",
      "frames: 421000, reward: 8.400000, loss: 0.001054, epsilon: 0.010001, episode:  270\n",
      "frames: 422000, reward: 8.400000, loss: 0.003016, epsilon: 0.010001, episode:  270\n",
      "frames: 423000, reward: 8.400000, loss: 0.001746, epsilon: 0.010001, episode:  270\n",
      "frames: 424000, reward: 7.400000, loss: 0.001400, epsilon: 0.010001, episode:  271\n",
      "frames: 425000, reward: 7.400000, loss: 0.000679, epsilon: 0.010001, episode:  271\n",
      "frames: 426000, reward: 7.400000, loss: 0.003574, epsilon: 0.010001, episode:  271\n",
      "frames: 427000, reward: 8.100000, loss: 0.008171, epsilon: 0.010001, episode:  272\n",
      "frames: 428000, reward: 8.100000, loss: 0.002107, epsilon: 0.010001, episode:  272\n",
      "frames: 429000, reward: 8.100000, loss: 0.008816, epsilon: 0.010001, episode:  272\n",
      "frames: 430000, reward: 9.000000, loss: 0.001443, epsilon: 0.010001, episode:  273\n",
      "frames: 431000, reward: 9.000000, loss: 0.001513, epsilon: 0.010001, episode:  273\n",
      "frames: 432000, reward: 9.000000, loss: 0.001150, epsilon: 0.010001, episode:  273\n",
      "frames: 433000, reward: 8.600000, loss: 0.001807, epsilon: 0.010001, episode:  274\n",
      "frames: 434000, reward: 8.600000, loss: 0.002865, epsilon: 0.010001, episode:  274\n",
      "frames: 435000, reward: 8.600000, loss: 0.007648, epsilon: 0.010000, episode:  274\n",
      "frames: 436000, reward: 9.400000, loss: 0.000595, epsilon: 0.010000, episode:  275\n",
      "frames: 437000, reward: 9.400000, loss: 0.002061, epsilon: 0.010000, episode:  275\n",
      "frames: 438000, reward: 10.900000, loss: 0.002865, epsilon: 0.010000, episode:  276\n",
      "frames: 439000, reward: 10.900000, loss: 0.001828, epsilon: 0.010000, episode:  276\n",
      "frames: 440000, reward: 11.900000, loss: 0.006667, epsilon: 0.010000, episode:  277\n",
      "frames: 441000, reward: 11.900000, loss: 0.001200, epsilon: 0.010000, episode:  277\n",
      "frames: 442000, reward: 11.900000, loss: 0.000937, epsilon: 0.010000, episode:  277\n",
      "frames: 443000, reward: 11.900000, loss: 0.009501, epsilon: 0.010000, episode:  277\n",
      "frames: 444000, reward: 10.800000, loss: 0.003133, epsilon: 0.010000, episode:  278\n",
      "frames: 445000, reward: 10.800000, loss: 0.009507, epsilon: 0.010000, episode:  278\n",
      "frames: 446000, reward: 10.800000, loss: 0.001605, epsilon: 0.010000, episode:  278\n",
      "frames: 447000, reward: 10.300000, loss: 0.001495, epsilon: 0.010000, episode:  279\n",
      "frames: 448000, reward: 10.300000, loss: 0.003144, epsilon: 0.010000, episode:  279\n",
      "frames: 449000, reward: 10.300000, loss: 0.001517, epsilon: 0.010000, episode:  279\n",
      "frames: 450000, reward: 10.400000, loss: 0.001033, epsilon: 0.010000, episode:  280\n",
      "frames: 451000, reward: 10.400000, loss: 0.003115, epsilon: 0.010000, episode:  280\n",
      "frames: 452000, reward: 12.800000, loss: 0.000892, epsilon: 0.010000, episode:  281\n",
      "frames: 453000, reward: 12.800000, loss: 0.001145, epsilon: 0.010000, episode:  281\n",
      "frames: 454000, reward: 12.800000, loss: 0.001196, epsilon: 0.010000, episode:  281\n",
      "frames: 455000, reward: 12.800000, loss: 0.000796, epsilon: 0.010000, episode:  282\n",
      "frames: 456000, reward: 12.800000, loss: 0.005925, epsilon: 0.010000, episode:  282\n",
      "frames: 457000, reward: 13.100000, loss: 0.002156, epsilon: 0.010000, episode:  283\n",
      "frames: 458000, reward: 13.100000, loss: 0.001038, epsilon: 0.010000, episode:  283\n",
      "frames: 459000, reward: 13.100000, loss: 0.001169, epsilon: 0.010000, episode:  283\n",
      "frames: 460000, reward: 13.500000, loss: 0.000588, epsilon: 0.010000, episode:  284\n",
      "frames: 461000, reward: 13.500000, loss: 0.001407, epsilon: 0.010000, episode:  284\n",
      "frames: 462000, reward: 13.500000, loss: 0.005850, epsilon: 0.010000, episode:  285\n",
      "frames: 463000, reward: 13.500000, loss: 0.003274, epsilon: 0.010000, episode:  285\n",
      "frames: 464000, reward: 13.100000, loss: 0.005792, epsilon: 0.010000, episode:  286\n",
      "frames: 465000, reward: 13.100000, loss: 0.003609, epsilon: 0.010000, episode:  286\n",
      "frames: 466000, reward: 13.300000, loss: 0.003926, epsilon: 0.010000, episode:  287\n",
      "frames: 467000, reward: 13.300000, loss: 0.001016, epsilon: 0.010000, episode:  287\n",
      "frames: 468000, reward: 15.300000, loss: 0.001175, epsilon: 0.010000, episode:  288\n",
      "frames: 469000, reward: 15.300000, loss: 0.002142, epsilon: 0.010000, episode:  288\n",
      "frames: 470000, reward: 15.300000, loss: 0.001335, epsilon: 0.010000, episode:  288\n",
      "frames: 471000, reward: 15.000000, loss: 0.000291, epsilon: 0.010000, episode:  289\n",
      "frames: 472000, reward: 16.300000, loss: 0.001464, epsilon: 0.010000, episode:  290\n",
      "frames: 473000, reward: 16.300000, loss: 0.000509, epsilon: 0.010000, episode:  290\n",
      "frames: 474000, reward: 16.300000, loss: 0.000585, epsilon: 0.010000, episode:  291\n",
      "frames: 475000, reward: 16.300000, loss: 0.001190, epsilon: 0.010000, episode:  291\n",
      "frames: 476000, reward: 16.300000, loss: 0.001344, epsilon: 0.010000, episode:  291\n",
      "frames: 477000, reward: 16.600000, loss: 0.002504, epsilon: 0.010000, episode:  292\n",
      "frames: 478000, reward: 16.600000, loss: 0.001687, epsilon: 0.010000, episode:  292\n",
      "frames: 479000, reward: 16.600000, loss: 0.001037, epsilon: 0.010000, episode:  292\n",
      "frames: 480000, reward: 15.500000, loss: 0.000900, epsilon: 0.010000, episode:  293\n",
      "frames: 481000, reward: 15.500000, loss: 0.001269, epsilon: 0.010000, episode:  293\n",
      "frames: 482000, reward: 16.300000, loss: 0.000809, epsilon: 0.010000, episode:  294\n",
      "frames: 483000, reward: 16.400000, loss: 0.000812, epsilon: 0.010000, episode:  295\n",
      "frames: 484000, reward: 16.400000, loss: 0.001457, epsilon: 0.010000, episode:  295\n",
      "frames: 485000, reward: 16.400000, loss: 0.001775, epsilon: 0.010000, episode:  295\n",
      "frames: 486000, reward: 16.100000, loss: 0.000403, epsilon: 0.010000, episode:  296\n",
      "frames: 487000, reward: 16.100000, loss: 0.000619, epsilon: 0.010000, episode:  296\n",
      "frames: 488000, reward: 16.100000, loss: 0.000777, epsilon: 0.010000, episode:  296\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 489000, reward: 16.200000, loss: 0.000806, epsilon: 0.010000, episode:  297\n",
      "frames: 490000, reward: 16.400000, loss: 0.002952, epsilon: 0.010000, episode:  298\n",
      "frames: 491000, reward: 16.400000, loss: 0.000669, epsilon: 0.010000, episode:  298\n",
      "frames: 492000, reward: 16.400000, loss: 0.002802, epsilon: 0.010000, episode:  298\n",
      "frames: 493000, reward: 16.800000, loss: 0.001012, epsilon: 0.010000, episode:  299\n",
      "frames: 494000, reward: 16.800000, loss: 0.000567, epsilon: 0.010000, episode:  299\n",
      "frames: 495000, reward: 16.500000, loss: 0.000656, epsilon: 0.010000, episode:  300\n",
      "frames: 496000, reward: 16.500000, loss: 0.000457, epsilon: 0.010000, episode:  300\n",
      "frames: 497000, reward: 16.500000, loss: 0.000436, epsilon: 0.010000, episode:  301\n",
      "frames: 498000, reward: 16.700000, loss: 0.001354, epsilon: 0.010000, episode:  302\n",
      "frames: 499000, reward: 16.700000, loss: 0.003930, epsilon: 0.010000, episode:  302\n",
      "frames: 500000, reward: 17.400000, loss: 0.000606, epsilon: 0.010000, episode:  303\n",
      "frames: 501000, reward: 17.400000, loss: 0.001292, epsilon: 0.010000, episode:  303\n",
      "frames: 502000, reward: 17.600000, loss: 0.003663, epsilon: 0.010000, episode:  304\n",
      "frames: 503000, reward: 17.600000, loss: 0.000479, epsilon: 0.010000, episode:  304\n",
      "frames: 504000, reward: 17.500000, loss: 0.001156, epsilon: 0.010000, episode:  305\n",
      "frames: 505000, reward: 17.500000, loss: 0.000749, epsilon: 0.010000, episode:  305\n",
      "frames: 506000, reward: 18.100000, loss: 0.000460, epsilon: 0.010000, episode:  306\n",
      "frames: 507000, reward: 18.100000, loss: 0.000325, epsilon: 0.010000, episode:  306\n",
      "frames: 508000, reward: 17.800000, loss: 0.002226, epsilon: 0.010000, episode:  307\n",
      "frames: 509000, reward: 17.800000, loss: 0.001073, epsilon: 0.010000, episode:  307\n",
      "frames: 510000, reward: 17.800000, loss: 0.000865, epsilon: 0.010000, episode:  308\n",
      "frames: 511000, reward: 17.800000, loss: 0.002194, epsilon: 0.010000, episode:  308\n",
      "frames: 512000, reward: 18.200000, loss: 0.002173, epsilon: 0.010000, episode:  309\n",
      "frames: 513000, reward: 18.200000, loss: 0.000676, epsilon: 0.010000, episode:  309\n",
      "frames: 514000, reward: 18.200000, loss: 0.000878, epsilon: 0.010000, episode:  310\n",
      "frames: 515000, reward: 18.200000, loss: 0.000820, epsilon: 0.010000, episode:  310\n",
      "frames: 516000, reward: 18.000000, loss: 0.001243, epsilon: 0.010000, episode:  311\n",
      "frames: 517000, reward: 18.000000, loss: 0.000555, epsilon: 0.010000, episode:  311\n",
      "frames: 518000, reward: 17.700000, loss: 0.000658, epsilon: 0.010000, episode:  312\n",
      "frames: 519000, reward: 17.700000, loss: 0.002103, epsilon: 0.010000, episode:  312\n",
      "frames: 520000, reward: 18.000000, loss: 0.000666, epsilon: 0.010000, episode:  313\n",
      "frames: 521000, reward: 18.000000, loss: 0.002601, epsilon: 0.010000, episode:  313\n",
      "frames: 522000, reward: 17.700000, loss: 0.001168, epsilon: 0.010000, episode:  314\n",
      "frames: 523000, reward: 17.700000, loss: 0.000574, epsilon: 0.010000, episode:  314\n",
      "frames: 524000, reward: 17.800000, loss: 0.000721, epsilon: 0.010000, episode:  315\n",
      "frames: 525000, reward: 17.800000, loss: 0.000427, epsilon: 0.010000, episode:  315\n",
      "frames: 526000, reward: 17.800000, loss: 0.002804, epsilon: 0.010000, episode:  315\n",
      "frames: 527000, reward: 17.500000, loss: 0.000395, epsilon: 0.010000, episode:  316\n",
      "frames: 528000, reward: 17.500000, loss: 0.001509, epsilon: 0.010000, episode:  316\n",
      "frames: 529000, reward: 18.000000, loss: 0.000435, epsilon: 0.010000, episode:  317\n",
      "frames: 530000, reward: 18.000000, loss: 0.001473, epsilon: 0.010000, episode:  318\n",
      "frames: 531000, reward: 18.000000, loss: 0.000608, epsilon: 0.010000, episode:  318\n",
      "frames: 532000, reward: 18.000000, loss: 0.000511, epsilon: 0.010000, episode:  318\n",
      "frames: 533000, reward: 17.700000, loss: 0.000536, epsilon: 0.010000, episode:  319\n",
      "frames: 534000, reward: 17.700000, loss: 0.000297, epsilon: 0.010000, episode:  319\n",
      "frames: 535000, reward: 17.900000, loss: 0.000441, epsilon: 0.010000, episode:  320\n",
      "frames: 536000, reward: 17.900000, loss: 0.000579, epsilon: 0.010000, episode:  320\n",
      "frames: 537000, reward: 17.900000, loss: 0.002134, epsilon: 0.010000, episode:  320\n",
      "frames: 538000, reward: 17.400000, loss: 0.000899, epsilon: 0.010000, episode:  321\n",
      "frames: 539000, reward: 17.600000, loss: 0.002981, epsilon: 0.010000, episode:  322\n",
      "frames: 540000, reward: 17.600000, loss: 0.010237, epsilon: 0.010000, episode:  322\n",
      "frames: 541000, reward: 17.600000, loss: 0.000965, epsilon: 0.010000, episode:  323\n",
      "frames: 542000, reward: 17.600000, loss: 0.000754, epsilon: 0.010000, episode:  323\n",
      "frames: 543000, reward: 17.700000, loss: 0.000589, epsilon: 0.010000, episode:  324\n",
      "frames: 544000, reward: 17.700000, loss: 0.001106, epsilon: 0.010000, episode:  324\n",
      "frames: 545000, reward: 17.200000, loss: 0.000859, epsilon: 0.010000, episode:  325\n",
      "frames: 546000, reward: 17.200000, loss: 0.001624, epsilon: 0.010000, episode:  325\n",
      "frames: 547000, reward: 17.200000, loss: 0.000902, epsilon: 0.010000, episode:  325\n",
      "frames: 548000, reward: 17.200000, loss: 0.001301, epsilon: 0.010000, episode:  326\n",
      "frames: 549000, reward: 17.200000, loss: 0.000875, epsilon: 0.010000, episode:  326\n",
      "frames: 550000, reward: 17.200000, loss: 0.000779, epsilon: 0.010000, episode:  326\n",
      "frames: 551000, reward: 16.500000, loss: 0.000254, epsilon: 0.010000, episode:  327\n",
      "frames: 552000, reward: 16.400000, loss: 0.000843, epsilon: 0.010000, episode:  328\n",
      "frames: 553000, reward: 16.400000, loss: 0.000386, epsilon: 0.010000, episode:  328\n",
      "frames: 554000, reward: 16.400000, loss: 0.001152, epsilon: 0.010000, episode:  328\n",
      "frames: 555000, reward: 16.300000, loss: 0.000422, epsilon: 0.010000, episode:  329\n",
      "frames: 556000, reward: 16.300000, loss: 0.000405, epsilon: 0.010000, episode:  329\n",
      "frames: 557000, reward: 16.500000, loss: 0.000398, epsilon: 0.010000, episode:  330\n",
      "frames: 558000, reward: 16.500000, loss: 0.000507, epsilon: 0.010000, episode:  330\n",
      "frames: 559000, reward: 16.500000, loss: 0.000551, epsilon: 0.010000, episode:  330\n",
      "frames: 560000, reward: 16.900000, loss: 0.000306, epsilon: 0.010000, episode:  331\n",
      "frames: 561000, reward: 16.900000, loss: 0.000314, epsilon: 0.010000, episode:  331\n",
      "frames: 562000, reward: 16.000000, loss: 0.001111, epsilon: 0.010000, episode:  332\n",
      "frames: 563000, reward: 16.000000, loss: 0.000952, epsilon: 0.010000, episode:  332\n",
      "frames: 564000, reward: 16.000000, loss: 0.001758, epsilon: 0.010000, episode:  332\n",
      "frames: 565000, reward: 16.000000, loss: 0.000618, epsilon: 0.010000, episode:  333\n",
      "frames: 566000, reward: 16.000000, loss: 0.000310, epsilon: 0.010000, episode:  334\n",
      "frames: 567000, reward: 16.000000, loss: 0.001278, epsilon: 0.010000, episode:  334\n",
      "frames: 568000, reward: 16.000000, loss: 0.000346, epsilon: 0.010000, episode:  334\n",
      "frames: 569000, reward: 16.000000, loss: 0.000461, epsilon: 0.010000, episode:  334\n",
      "frames: 570000, reward: 15.200000, loss: 0.000463, epsilon: 0.010000, episode:  335\n",
      "frames: 571000, reward: 15.200000, loss: 0.000569, epsilon: 0.010000, episode:  335\n",
      "frames: 572000, reward: 15.200000, loss: 0.000884, epsilon: 0.010000, episode:  335\n",
      "frames: 573000, reward: 14.500000, loss: 0.001148, epsilon: 0.010000, episode:  336\n",
      "frames: 574000, reward: 14.500000, loss: 0.000404, epsilon: 0.010000, episode:  336\n",
      "frames: 575000, reward: 14.600000, loss: 0.000339, epsilon: 0.010000, episode:  337\n",
      "frames: 576000, reward: 14.600000, loss: 0.000632, epsilon: 0.010000, episode:  337\n",
      "frames: 577000, reward: 14.100000, loss: 0.001120, epsilon: 0.010000, episode:  338\n",
      "frames: 578000, reward: 14.100000, loss: 0.000833, epsilon: 0.010000, episode:  338\n",
      "frames: 579000, reward: 14.100000, loss: 0.000776, epsilon: 0.010000, episode:  338\n",
      "frames: 580000, reward: 14.100000, loss: 0.000432, epsilon: 0.010000, episode:  338\n",
      "frames: 581000, reward: 13.600000, loss: 0.000546, epsilon: 0.010000, episode:  339\n",
      "frames: 582000, reward: 13.600000, loss: 0.001689, epsilon: 0.010000, episode:  339\n",
      "frames: 583000, reward: 13.600000, loss: 0.000733, epsilon: 0.010000, episode:  339\n",
      "frames: 584000, reward: 12.700000, loss: 0.000433, epsilon: 0.010000, episode:  340\n",
      "frames: 585000, reward: 12.700000, loss: 0.000689, epsilon: 0.010000, episode:  340\n",
      "frames: 586000, reward: 12.400000, loss: 0.001163, epsilon: 0.010000, episode:  341\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 587000, reward: 12.400000, loss: 0.000598, epsilon: 0.010000, episode:  341\n",
      "frames: 588000, reward: 12.400000, loss: 0.000227, epsilon: 0.010000, episode:  341\n",
      "frames: 589000, reward: 13.200000, loss: 0.000818, epsilon: 0.010000, episode:  342\n",
      "frames: 590000, reward: 13.200000, loss: 0.000805, epsilon: 0.010000, episode:  342\n",
      "frames: 591000, reward: 13.100000, loss: 0.001194, epsilon: 0.010000, episode:  343\n",
      "frames: 592000, reward: 13.000000, loss: 0.000661, epsilon: 0.010000, episode:  344\n",
      "frames: 593000, reward: 13.000000, loss: 0.002571, epsilon: 0.010000, episode:  344\n",
      "frames: 594000, reward: 13.000000, loss: 0.000640, epsilon: 0.010000, episode:  344\n",
      "frames: 595000, reward: 14.200000, loss: 0.001762, epsilon: 0.010000, episode:  345\n",
      "frames: 596000, reward: 14.200000, loss: 0.000350, epsilon: 0.010000, episode:  345\n",
      "frames: 597000, reward: 15.300000, loss: 0.000819, epsilon: 0.010000, episode:  346\n",
      "frames: 598000, reward: 15.300000, loss: 0.000557, epsilon: 0.010000, episode:  346\n",
      "frames: 599000, reward: 15.800000, loss: 0.000362, epsilon: 0.010000, episode:  347\n",
      "frames: 600000, reward: 15.800000, loss: 0.000473, epsilon: 0.010000, episode:  347\n",
      "frames: 601000, reward: 16.400000, loss: 0.000236, epsilon: 0.010000, episode:  348\n",
      "frames: 602000, reward: 16.400000, loss: 0.002947, epsilon: 0.010000, episode:  348\n",
      "frames: 603000, reward: 17.300000, loss: 0.000522, epsilon: 0.010000, episode:  349\n",
      "frames: 604000, reward: 17.300000, loss: 0.000675, epsilon: 0.010000, episode:  349\n",
      "frames: 605000, reward: 17.300000, loss: 0.000529, epsilon: 0.010000, episode:  349\n",
      "frames: 606000, reward: 17.900000, loss: 0.004416, epsilon: 0.010000, episode:  350\n",
      "frames: 607000, reward: 17.900000, loss: 0.001101, epsilon: 0.010000, episode:  350\n",
      "frames: 608000, reward: 18.200000, loss: 0.000903, epsilon: 0.010000, episode:  351\n",
      "frames: 609000, reward: 18.200000, loss: 0.000274, epsilon: 0.010000, episode:  351\n",
      "frames: 610000, reward: 18.200000, loss: 0.000924, epsilon: 0.010000, episode:  351\n",
      "frames: 611000, reward: 18.200000, loss: 0.000614, epsilon: 0.010000, episode:  352\n",
      "frames: 612000, reward: 18.200000, loss: 0.000380, epsilon: 0.010000, episode:  352\n",
      "frames: 613000, reward: 18.500000, loss: 0.000467, epsilon: 0.010000, episode:  353\n",
      "frames: 614000, reward: 18.500000, loss: 0.000216, epsilon: 0.010000, episode:  353\n",
      "frames: 615000, reward: 18.600000, loss: 0.000849, epsilon: 0.010000, episode:  354\n",
      "frames: 616000, reward: 18.800000, loss: 0.001108, epsilon: 0.010000, episode:  355\n",
      "frames: 617000, reward: 18.800000, loss: 0.000487, epsilon: 0.010000, episode:  355\n",
      "frames: 618000, reward: 18.900000, loss: 0.000127, epsilon: 0.010000, episode:  356\n",
      "frames: 619000, reward: 18.900000, loss: 0.000348, epsilon: 0.010000, episode:  356\n",
      "frames: 620000, reward: 18.400000, loss: 0.000295, epsilon: 0.010000, episode:  357\n",
      "frames: 621000, reward: 18.400000, loss: 0.000452, epsilon: 0.010000, episode:  357\n",
      "frames: 622000, reward: 18.400000, loss: 0.000377, epsilon: 0.010000, episode:  357\n",
      "frames: 623000, reward: 17.900000, loss: 0.000585, epsilon: 0.010000, episode:  358\n",
      "frames: 624000, reward: 17.900000, loss: 0.000595, epsilon: 0.010000, episode:  358\n",
      "frames: 625000, reward: 17.900000, loss: 0.000428, epsilon: 0.010000, episode:  358\n",
      "frames: 626000, reward: 17.700000, loss: 0.000442, epsilon: 0.010000, episode:  359\n",
      "frames: 627000, reward: 18.100000, loss: 0.000372, epsilon: 0.010000, episode:  360\n",
      "frames: 628000, reward: 18.100000, loss: 0.000568, epsilon: 0.010000, episode:  360\n",
      "frames: 629000, reward: 18.500000, loss: 0.001162, epsilon: 0.010000, episode:  361\n",
      "frames: 630000, reward: 18.500000, loss: 0.000489, epsilon: 0.010000, episode:  361\n",
      "frames: 631000, reward: 18.600000, loss: 0.000981, epsilon: 0.010000, episode:  362\n",
      "frames: 632000, reward: 18.600000, loss: 0.000351, epsilon: 0.010000, episode:  362\n",
      "frames: 633000, reward: 18.400000, loss: 0.000384, epsilon: 0.010000, episode:  363\n",
      "frames: 634000, reward: 18.400000, loss: 0.000604, epsilon: 0.010000, episode:  363\n",
      "frames: 635000, reward: 18.400000, loss: 0.000323, epsilon: 0.010000, episode:  364\n",
      "frames: 636000, reward: 18.300000, loss: 0.001019, epsilon: 0.010000, episode:  365\n",
      "frames: 637000, reward: 18.300000, loss: 0.002433, epsilon: 0.010000, episode:  365\n",
      "frames: 638000, reward: 18.300000, loss: 0.000612, epsilon: 0.010000, episode:  365\n",
      "frames: 639000, reward: 17.800000, loss: 0.000483, epsilon: 0.010000, episode:  366\n",
      "frames: 640000, reward: 18.300000, loss: 0.000925, epsilon: 0.010000, episode:  367\n",
      "frames: 641000, reward: 18.300000, loss: 0.000409, epsilon: 0.010000, episode:  367\n",
      "frames: 642000, reward: 18.600000, loss: 0.004784, epsilon: 0.010000, episode:  368\n",
      "frames: 643000, reward: 18.600000, loss: 0.000564, epsilon: 0.010000, episode:  368\n",
      "frames: 644000, reward: 18.600000, loss: 0.000496, epsilon: 0.010000, episode:  368\n",
      "frames: 645000, reward: 18.300000, loss: 0.001703, epsilon: 0.010000, episode:  369\n",
      "frames: 646000, reward: 18.200000, loss: 0.001322, epsilon: 0.010000, episode:  370\n",
      "frames: 647000, reward: 18.200000, loss: 0.001076, epsilon: 0.010000, episode:  370\n",
      "frames: 648000, reward: 18.100000, loss: 0.000522, epsilon: 0.010000, episode:  371\n",
      "frames: 649000, reward: 18.100000, loss: 0.000997, epsilon: 0.010000, episode:  371\n",
      "frames: 650000, reward: 18.300000, loss: 0.000413, epsilon: 0.010000, episode:  372\n",
      "frames: 651000, reward: 18.300000, loss: 0.000530, epsilon: 0.010000, episode:  372\n",
      "frames: 652000, reward: 18.300000, loss: 0.000468, epsilon: 0.010000, episode:  373\n",
      "frames: 653000, reward: 18.300000, loss: 0.001187, epsilon: 0.010000, episode:  373\n",
      "frames: 654000, reward: 18.300000, loss: 0.000389, epsilon: 0.010000, episode:  374\n",
      "frames: 655000, reward: 18.300000, loss: 0.001784, epsilon: 0.010000, episode:  374\n",
      "frames: 656000, reward: 18.500000, loss: 0.000583, epsilon: 0.010000, episode:  375\n",
      "frames: 657000, reward: 18.500000, loss: 0.000787, epsilon: 0.010000, episode:  375\n",
      "frames: 658000, reward: 18.500000, loss: 0.000858, epsilon: 0.010000, episode:  375\n",
      "frames: 659000, reward: 17.700000, loss: 0.002933, epsilon: 0.010000, episode:  376\n",
      "frames: 660000, reward: 17.700000, loss: 0.000922, epsilon: 0.010000, episode:  376\n",
      "frames: 661000, reward: 17.800000, loss: 0.000687, epsilon: 0.010000, episode:  377\n",
      "frames: 662000, reward: 17.800000, loss: 0.001302, epsilon: 0.010000, episode:  377\n",
      "frames: 663000, reward: 18.100000, loss: 0.000322, epsilon: 0.010000, episode:  378\n",
      "frames: 664000, reward: 18.100000, loss: 0.000526, epsilon: 0.010000, episode:  378\n",
      "frames: 665000, reward: 18.500000, loss: 0.000873, epsilon: 0.010000, episode:  379\n",
      "frames: 666000, reward: 18.500000, loss: 0.000276, epsilon: 0.010000, episode:  379\n",
      "frames: 667000, reward: 18.700000, loss: 0.001584, epsilon: 0.010000, episode:  380\n",
      "frames: 668000, reward: 18.700000, loss: 0.000829, epsilon: 0.010000, episode:  380\n",
      "frames: 669000, reward: 18.300000, loss: 0.001241, epsilon: 0.010000, episode:  381\n",
      "frames: 670000, reward: 18.300000, loss: 0.000290, epsilon: 0.010000, episode:  381\n",
      "frames: 671000, reward: 18.300000, loss: 0.000757, epsilon: 0.010000, episode:  382\n",
      "frames: 672000, reward: 18.300000, loss: 0.000301, epsilon: 0.010000, episode:  382\n",
      "frames: 673000, reward: 18.300000, loss: 0.000322, epsilon: 0.010000, episode:  382\n",
      "frames: 674000, reward: 17.900000, loss: 0.000470, epsilon: 0.010000, episode:  383\n",
      "frames: 675000, reward: 17.900000, loss: 0.000910, epsilon: 0.010000, episode:  383\n",
      "frames: 676000, reward: 17.700000, loss: 0.001903, epsilon: 0.010000, episode:  384\n",
      "frames: 677000, reward: 17.600000, loss: 0.000401, epsilon: 0.010000, episode:  385\n",
      "frames: 678000, reward: 17.600000, loss: 0.000958, epsilon: 0.010000, episode:  385\n",
      "frames: 679000, reward: 17.600000, loss: 0.001223, epsilon: 0.010000, episode:  385\n",
      "frames: 680000, reward: 17.600000, loss: 0.000177, epsilon: 0.010000, episode:  385\n",
      "frames: 681000, reward: 17.700000, loss: 0.000969, epsilon: 0.010000, episode:  386\n",
      "frames: 682000, reward: 17.700000, loss: 0.001237, epsilon: 0.010000, episode:  386\n",
      "frames: 683000, reward: 17.600000, loss: 0.000426, epsilon: 0.010000, episode:  387\n",
      "frames: 684000, reward: 17.600000, loss: 0.000483, epsilon: 0.010000, episode:  387\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 685000, reward: 17.600000, loss: 0.000786, epsilon: 0.010000, episode:  387\n",
      "frames: 686000, reward: 16.900000, loss: 0.000333, epsilon: 0.010000, episode:  388\n",
      "frames: 687000, reward: 16.900000, loss: 0.000835, epsilon: 0.010000, episode:  388\n",
      "frames: 688000, reward: 16.900000, loss: 0.001384, epsilon: 0.010000, episode:  389\n",
      "frames: 689000, reward: 16.900000, loss: 0.000644, epsilon: 0.010000, episode:  389\n",
      "frames: 690000, reward: 16.500000, loss: 0.000648, epsilon: 0.010000, episode:  390\n",
      "frames: 691000, reward: 16.500000, loss: 0.000646, epsilon: 0.010000, episode:  390\n",
      "frames: 692000, reward: 16.900000, loss: 0.000633, epsilon: 0.010000, episode:  391\n",
      "frames: 693000, reward: 16.900000, loss: 0.000218, epsilon: 0.010000, episode:  391\n",
      "frames: 694000, reward: 16.700000, loss: 0.001560, epsilon: 0.010000, episode:  392\n",
      "frames: 695000, reward: 16.700000, loss: 0.000385, epsilon: 0.010000, episode:  392\n",
      "frames: 696000, reward: 16.700000, loss: 0.000223, epsilon: 0.010000, episode:  392\n",
      "frames: 697000, reward: 16.900000, loss: 0.000807, epsilon: 0.010000, episode:  393\n",
      "frames: 698000, reward: 16.900000, loss: 0.000401, epsilon: 0.010000, episode:  393\n",
      "frames: 699000, reward: 17.000000, loss: 0.000809, epsilon: 0.010000, episode:  394\n",
      "frames: 700000, reward: 17.000000, loss: 0.000551, epsilon: 0.010000, episode:  394\n",
      "frames: 701000, reward: 16.900000, loss: 0.000297, epsilon: 0.010000, episode:  395\n",
      "frames: 702000, reward: 16.900000, loss: 0.001045, epsilon: 0.010000, episode:  395\n",
      "frames: 703000, reward: 17.500000, loss: 0.003377, epsilon: 0.010000, episode:  396\n",
      "frames: 704000, reward: 17.500000, loss: 0.000675, epsilon: 0.010000, episode:  396\n",
      "frames: 705000, reward: 17.400000, loss: 0.000326, epsilon: 0.010000, episode:  397\n",
      "frames: 706000, reward: 17.400000, loss: 0.000489, epsilon: 0.010000, episode:  397\n",
      "frames: 707000, reward: 18.100000, loss: 0.001358, epsilon: 0.010000, episode:  398\n",
      "frames: 708000, reward: 18.100000, loss: 0.000591, epsilon: 0.010000, episode:  398\n",
      "frames: 709000, reward: 18.400000, loss: 0.000143, epsilon: 0.010000, episode:  399\n",
      "frames: 710000, reward: 18.400000, loss: 0.000648, epsilon: 0.010000, episode:  399\n",
      "frames: 711000, reward: 18.400000, loss: 0.000670, epsilon: 0.010000, episode:  399\n",
      "frames: 712000, reward: 18.400000, loss: 0.000397, epsilon: 0.010000, episode:  399\n",
      "frames: 713000, reward: 18.400000, loss: 0.000222, epsilon: 0.010000, episode:  399\n",
      "frames: 714000, reward: 17.800000, loss: 0.000757, epsilon: 0.010000, episode:  400\n",
      "frames: 715000, reward: 17.800000, loss: 0.000510, epsilon: 0.010000, episode:  400\n",
      "frames: 716000, reward: 17.700000, loss: 0.000450, epsilon: 0.010000, episode:  401\n",
      "frames: 717000, reward: 17.700000, loss: 0.001548, epsilon: 0.010000, episode:  401\n",
      "frames: 718000, reward: 17.700000, loss: 0.000622, epsilon: 0.010000, episode:  402\n",
      "frames: 719000, reward: 17.700000, loss: 0.004172, epsilon: 0.010000, episode:  402\n",
      "frames: 720000, reward: 17.700000, loss: 0.000283, epsilon: 0.010000, episode:  402\n",
      "frames: 721000, reward: 17.700000, loss: 0.000409, epsilon: 0.010000, episode:  403\n",
      "frames: 722000, reward: 17.700000, loss: 0.000317, epsilon: 0.010000, episode:  403\n",
      "frames: 723000, reward: 17.700000, loss: 0.002201, epsilon: 0.010000, episode:  403\n",
      "frames: 724000, reward: 17.700000, loss: 0.001608, epsilon: 0.010000, episode:  403\n",
      "frames: 725000, reward: 16.300000, loss: 0.002935, epsilon: 0.010000, episode:  404\n",
      "frames: 726000, reward: 16.300000, loss: 0.000559, epsilon: 0.010000, episode:  404\n",
      "frames: 727000, reward: 16.400000, loss: 0.000630, epsilon: 0.010000, episode:  405\n",
      "frames: 728000, reward: 16.400000, loss: 0.000678, epsilon: 0.010000, episode:  405\n",
      "frames: 729000, reward: 17.000000, loss: 0.000359, epsilon: 0.010000, episode:  406\n",
      "frames: 730000, reward: 17.000000, loss: 0.001584, epsilon: 0.010000, episode:  406\n",
      "frames: 731000, reward: 17.100000, loss: 0.000962, epsilon: 0.010000, episode:  407\n",
      "frames: 732000, reward: 17.100000, loss: 0.002236, epsilon: 0.010000, episode:  407\n",
      "frames: 733000, reward: 17.100000, loss: 0.000698, epsilon: 0.010000, episode:  407\n",
      "frames: 734000, reward: 15.900000, loss: 0.001189, epsilon: 0.010000, episode:  408\n",
      "frames: 735000, reward: 15.800000, loss: 0.000689, epsilon: 0.010000, episode:  409\n",
      "frames: 736000, reward: 15.800000, loss: 0.000358, epsilon: 0.010000, episode:  409\n",
      "frames: 737000, reward: 16.700000, loss: 0.000412, epsilon: 0.010000, episode:  410\n",
      "frames: 738000, reward: 16.700000, loss: 0.000206, epsilon: 0.010000, episode:  410\n",
      "frames: 739000, reward: 16.800000, loss: 0.000836, epsilon: 0.010000, episode:  411\n",
      "frames: 740000, reward: 16.800000, loss: 0.001592, epsilon: 0.010000, episode:  411\n",
      "frames: 741000, reward: 16.800000, loss: 0.002099, epsilon: 0.010000, episode:  411\n",
      "frames: 742000, reward: 16.600000, loss: 0.000341, epsilon: 0.010000, episode:  412\n",
      "frames: 743000, reward: 16.600000, loss: 0.000374, epsilon: 0.010000, episode:  412\n",
      "frames: 744000, reward: 16.600000, loss: 0.004336, epsilon: 0.010000, episode:  412\n",
      "frames: 745000, reward: 16.400000, loss: 0.000425, epsilon: 0.010000, episode:  413\n",
      "frames: 746000, reward: 16.400000, loss: 0.000614, epsilon: 0.010000, episode:  413\n",
      "frames: 747000, reward: 17.700000, loss: 0.003946, epsilon: 0.010000, episode:  414\n",
      "frames: 748000, reward: 17.700000, loss: 0.000685, epsilon: 0.010000, episode:  414\n",
      "frames: 749000, reward: 17.700000, loss: 0.000952, epsilon: 0.010000, episode:  414\n",
      "frames: 750000, reward: 17.400000, loss: 0.000475, epsilon: 0.010000, episode:  415\n",
      "frames: 751000, reward: 17.400000, loss: 0.001040, epsilon: 0.010000, episode:  416\n",
      "frames: 752000, reward: 17.400000, loss: 0.001218, epsilon: 0.010000, episode:  416\n",
      "frames: 753000, reward: 17.400000, loss: 0.000410, epsilon: 0.010000, episode:  416\n",
      "frames: 754000, reward: 17.100000, loss: 0.000792, epsilon: 0.010000, episode:  417\n",
      "frames: 755000, reward: 17.100000, loss: 0.000646, epsilon: 0.010000, episode:  417\n",
      "frames: 756000, reward: 18.100000, loss: 0.000210, epsilon: 0.010000, episode:  418\n",
      "frames: 757000, reward: 18.100000, loss: 0.000705, epsilon: 0.010000, episode:  418\n",
      "frames: 758000, reward: 17.900000, loss: 0.001069, epsilon: 0.010000, episode:  419\n",
      "frames: 759000, reward: 17.900000, loss: 0.000776, epsilon: 0.010000, episode:  419\n",
      "frames: 760000, reward: 17.900000, loss: 0.000743, epsilon: 0.010000, episode:  419\n",
      "frames: 761000, reward: 17.200000, loss: 0.000795, epsilon: 0.010000, episode:  420\n",
      "frames: 762000, reward: 17.200000, loss: 0.001245, epsilon: 0.010000, episode:  420\n",
      "frames: 763000, reward: 17.200000, loss: 0.000388, epsilon: 0.010000, episode:  420\n",
      "frames: 764000, reward: 16.700000, loss: 0.001907, epsilon: 0.010000, episode:  421\n",
      "frames: 765000, reward: 16.700000, loss: 0.000392, epsilon: 0.010000, episode:  421\n",
      "frames: 766000, reward: 16.900000, loss: 0.000595, epsilon: 0.010000, episode:  422\n",
      "frames: 767000, reward: 16.900000, loss: 0.000761, epsilon: 0.010000, episode:  422\n",
      "frames: 768000, reward: 17.400000, loss: 0.000399, epsilon: 0.010000, episode:  423\n",
      "frames: 769000, reward: 17.400000, loss: 0.000523, epsilon: 0.010000, episode:  423\n",
      "frames: 770000, reward: 17.500000, loss: 0.000554, epsilon: 0.010000, episode:  424\n",
      "frames: 771000, reward: 17.500000, loss: 0.001334, epsilon: 0.010000, episode:  424\n",
      "frames: 772000, reward: 17.800000, loss: 0.000606, epsilon: 0.010000, episode:  425\n",
      "frames: 773000, reward: 17.800000, loss: 0.000946, epsilon: 0.010000, episode:  425\n",
      "frames: 774000, reward: 17.600000, loss: 0.000494, epsilon: 0.010000, episode:  426\n",
      "frames: 775000, reward: 17.600000, loss: 0.000362, epsilon: 0.010000, episode:  426\n",
      "frames: 776000, reward: 17.800000, loss: 0.001817, epsilon: 0.010000, episode:  427\n",
      "frames: 777000, reward: 17.800000, loss: 0.000462, epsilon: 0.010000, episode:  427\n",
      "frames: 778000, reward: 18.000000, loss: 0.000549, epsilon: 0.010000, episode:  428\n",
      "frames: 779000, reward: 18.000000, loss: 0.001582, epsilon: 0.010000, episode:  428\n",
      "frames: 780000, reward: 18.000000, loss: 0.000369, epsilon: 0.010000, episode:  428\n",
      "frames: 781000, reward: 17.000000, loss: 0.000770, epsilon: 0.010000, episode:  429\n",
      "frames: 782000, reward: 17.000000, loss: 0.000376, epsilon: 0.010000, episode:  429\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 783000, reward: 17.000000, loss: 0.000882, epsilon: 0.010000, episode:  430\n",
      "frames: 784000, reward: 17.000000, loss: 0.000531, epsilon: 0.010000, episode:  430\n",
      "frames: 785000, reward: 17.000000, loss: 0.001317, epsilon: 0.010000, episode:  430\n",
      "frames: 786000, reward: 17.400000, loss: 0.001411, epsilon: 0.010000, episode:  431\n",
      "frames: 787000, reward: 17.400000, loss: 0.001159, epsilon: 0.010000, episode:  431\n",
      "frames: 788000, reward: 17.700000, loss: 0.000461, epsilon: 0.010000, episode:  432\n",
      "frames: 789000, reward: 17.700000, loss: 0.000536, epsilon: 0.010000, episode:  432\n",
      "frames: 790000, reward: 17.700000, loss: 0.000350, epsilon: 0.010000, episode:  433\n",
      "frames: 791000, reward: 17.700000, loss: 0.000567, epsilon: 0.010000, episode:  433\n",
      "frames: 792000, reward: 17.600000, loss: 0.000698, epsilon: 0.010000, episode:  434\n",
      "frames: 793000, reward: 17.600000, loss: 0.000472, epsilon: 0.010000, episode:  434\n",
      "frames: 794000, reward: 17.600000, loss: 0.000565, epsilon: 0.010000, episode:  435\n",
      "frames: 795000, reward: 17.600000, loss: 0.001038, epsilon: 0.010000, episode:  435\n",
      "frames: 796000, reward: 17.600000, loss: 0.000633, epsilon: 0.010000, episode:  435\n",
      "frames: 797000, reward: 17.700000, loss: 0.000315, epsilon: 0.010000, episode:  436\n",
      "frames: 798000, reward: 17.700000, loss: 0.000481, epsilon: 0.010000, episode:  436\n",
      "frames: 799000, reward: 17.700000, loss: 0.000589, epsilon: 0.010000, episode:  437\n",
      "frames: 800000, reward: 17.700000, loss: 0.000319, epsilon: 0.010000, episode:  437\n",
      "frames: 801000, reward: 17.700000, loss: 0.000606, epsilon: 0.010000, episode:  437\n",
      "frames: 802000, reward: 17.300000, loss: 0.001409, epsilon: 0.010000, episode:  438\n",
      "frames: 803000, reward: 17.300000, loss: 0.000264, epsilon: 0.010000, episode:  438\n",
      "frames: 804000, reward: 17.300000, loss: 0.001869, epsilon: 0.010000, episode:  438\n",
      "frames: 805000, reward: 17.000000, loss: 0.000318, epsilon: 0.010000, episode:  439\n",
      "frames: 806000, reward: 17.000000, loss: 0.002045, epsilon: 0.010000, episode:  439\n",
      "frames: 807000, reward: 17.000000, loss: 0.000661, epsilon: 0.010000, episode:  439\n",
      "frames: 808000, reward: 16.800000, loss: 0.000390, epsilon: 0.010000, episode:  440\n",
      "frames: 809000, reward: 16.800000, loss: 0.000582, epsilon: 0.010000, episode:  440\n",
      "frames: 810000, reward: 16.700000, loss: 0.001157, epsilon: 0.010000, episode:  441\n",
      "frames: 811000, reward: 16.700000, loss: 0.000566, epsilon: 0.010000, episode:  441\n",
      "frames: 812000, reward: 16.700000, loss: 0.000217, epsilon: 0.010000, episode:  441\n",
      "frames: 813000, reward: 16.700000, loss: 0.002447, epsilon: 0.010000, episode:  441\n",
      "frames: 814000, reward: 15.700000, loss: 0.000483, epsilon: 0.010000, episode:  442\n",
      "frames: 815000, reward: 15.700000, loss: 0.000209, epsilon: 0.010000, episode:  443\n",
      "frames: 816000, reward: 15.700000, loss: 0.000465, epsilon: 0.010000, episode:  443\n",
      "frames: 817000, reward: 15.900000, loss: 0.000475, epsilon: 0.010000, episode:  444\n",
      "frames: 818000, reward: 15.900000, loss: 0.005153, epsilon: 0.010000, episode:  444\n",
      "frames: 819000, reward: 15.800000, loss: 0.000335, epsilon: 0.010000, episode:  445\n",
      "frames: 820000, reward: 15.800000, loss: 0.000882, epsilon: 0.010000, episode:  445\n",
      "frames: 821000, reward: 15.800000, loss: 0.001085, epsilon: 0.010000, episode:  446\n",
      "frames: 822000, reward: 15.800000, loss: 0.000613, epsilon: 0.010000, episode:  446\n",
      "frames: 823000, reward: 15.800000, loss: 0.006800, epsilon: 0.010000, episode:  447\n",
      "frames: 824000, reward: 15.800000, loss: 0.000342, epsilon: 0.010000, episode:  447\n",
      "frames: 825000, reward: 16.100000, loss: 0.000540, epsilon: 0.010000, episode:  448\n",
      "frames: 826000, reward: 16.100000, loss: 0.003716, epsilon: 0.010000, episode:  448\n",
      "frames: 827000, reward: 16.100000, loss: 0.000711, epsilon: 0.010000, episode:  448\n",
      "frames: 828000, reward: 17.100000, loss: 0.000744, epsilon: 0.010000, episode:  449\n",
      "frames: 829000, reward: 17.100000, loss: 0.001057, epsilon: 0.010000, episode:  449\n",
      "frames: 830000, reward: 17.700000, loss: 0.000784, epsilon: 0.010000, episode:  450\n",
      "frames: 831000, reward: 17.700000, loss: 0.000565, epsilon: 0.010000, episode:  450\n",
      "frames: 832000, reward: 17.900000, loss: 0.000362, epsilon: 0.010000, episode:  451\n",
      "frames: 833000, reward: 17.900000, loss: 0.000576, epsilon: 0.010000, episode:  451\n",
      "frames: 834000, reward: 17.900000, loss: 0.000563, epsilon: 0.010000, episode:  451\n",
      "frames: 835000, reward: 18.600000, loss: 0.000384, epsilon: 0.010000, episode:  452\n",
      "frames: 836000, reward: 18.600000, loss: 0.002403, epsilon: 0.010000, episode:  452\n",
      "frames: 837000, reward: 18.600000, loss: 0.006121, epsilon: 0.010000, episode:  452\n",
      "frames: 838000, reward: 18.200000, loss: 0.002394, epsilon: 0.010000, episode:  453\n",
      "frames: 839000, reward: 18.200000, loss: 0.000840, epsilon: 0.010000, episode:  453\n",
      "frames: 840000, reward: 18.200000, loss: 0.002490, epsilon: 0.010000, episode:  453\n",
      "frames: 841000, reward: 18.000000, loss: 0.001282, epsilon: 0.010000, episode:  454\n",
      "frames: 842000, reward: 18.000000, loss: 0.000534, epsilon: 0.010000, episode:  454\n",
      "frames: 843000, reward: 17.400000, loss: 0.000459, epsilon: 0.010000, episode:  455\n",
      "frames: 844000, reward: 17.400000, loss: 0.000720, epsilon: 0.010000, episode:  455\n",
      "frames: 845000, reward: 17.400000, loss: 0.000575, epsilon: 0.010000, episode:  456\n",
      "frames: 846000, reward: 17.400000, loss: 0.001913, epsilon: 0.010000, episode:  456\n",
      "frames: 847000, reward: 17.400000, loss: 0.000448, epsilon: 0.010000, episode:  456\n",
      "frames: 848000, reward: 17.100000, loss: 0.000246, epsilon: 0.010000, episode:  457\n",
      "frames: 849000, reward: 17.100000, loss: 0.000796, epsilon: 0.010000, episode:  457\n",
      "frames: 850000, reward: 17.100000, loss: 0.000738, epsilon: 0.010000, episode:  457\n",
      "frames: 851000, reward: 16.500000, loss: 0.000521, epsilon: 0.010000, episode:  458\n",
      "frames: 852000, reward: 16.500000, loss: 0.000977, epsilon: 0.010000, episode:  458\n",
      "frames: 853000, reward: 16.500000, loss: 0.000510, epsilon: 0.010000, episode:  458\n",
      "frames: 854000, reward: 16.700000, loss: 0.000884, epsilon: 0.010000, episode:  459\n",
      "frames: 855000, reward: 16.700000, loss: 0.000989, epsilon: 0.010000, episode:  459\n",
      "frames: 856000, reward: 16.700000, loss: 0.000383, epsilon: 0.010000, episode:  459\n",
      "frames: 857000, reward: 17.000000, loss: 0.000342, epsilon: 0.010000, episode:  460\n",
      "frames: 858000, reward: 17.000000, loss: 0.000308, epsilon: 0.010000, episode:  460\n",
      "frames: 859000, reward: 17.000000, loss: 0.000750, epsilon: 0.010000, episode:  460\n",
      "frames: 860000, reward: 16.400000, loss: 0.001700, epsilon: 0.010000, episode:  461\n",
      "frames: 861000, reward: 16.400000, loss: 0.000514, epsilon: 0.010000, episode:  461\n",
      "frames: 862000, reward: 16.400000, loss: 0.000303, epsilon: 0.010000, episode:  461\n",
      "frames: 863000, reward: 16.300000, loss: 0.000768, epsilon: 0.010000, episode:  462\n",
      "frames: 864000, reward: 16.300000, loss: 0.000299, epsilon: 0.010000, episode:  462\n",
      "frames: 865000, reward: 16.600000, loss: 0.000185, epsilon: 0.010000, episode:  463\n",
      "frames: 866000, reward: 16.600000, loss: 0.000466, epsilon: 0.010000, episode:  463\n",
      "frames: 867000, reward: 17.000000, loss: 0.000492, epsilon: 0.010000, episode:  464\n",
      "frames: 868000, reward: 17.000000, loss: 0.001332, epsilon: 0.010000, episode:  464\n",
      "frames: 869000, reward: 16.900000, loss: 0.000947, epsilon: 0.010000, episode:  465\n",
      "frames: 870000, reward: 16.900000, loss: 0.004943, epsilon: 0.010000, episode:  465\n",
      "frames: 871000, reward: 16.900000, loss: 0.000558, epsilon: 0.010000, episode:  466\n",
      "frames: 872000, reward: 16.900000, loss: 0.000413, epsilon: 0.010000, episode:  466\n",
      "frames: 873000, reward: 16.900000, loss: 0.000480, epsilon: 0.010000, episode:  466\n",
      "frames: 874000, reward: 17.000000, loss: 0.000401, epsilon: 0.010000, episode:  467\n",
      "frames: 875000, reward: 17.000000, loss: 0.001188, epsilon: 0.010000, episode:  467\n",
      "frames: 876000, reward: 17.000000, loss: 0.000389, epsilon: 0.010000, episode:  467\n",
      "frames: 877000, reward: 17.100000, loss: 0.000494, epsilon: 0.010000, episode:  468\n",
      "frames: 878000, reward: 17.100000, loss: 0.001114, epsilon: 0.010000, episode:  468\n",
      "frames: 879000, reward: 17.400000, loss: 0.000391, epsilon: 0.010000, episode:  469\n",
      "frames: 880000, reward: 17.400000, loss: 0.001022, epsilon: 0.010000, episode:  469\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 881000, reward: 17.600000, loss: 0.000185, epsilon: 0.010000, episode:  470\n",
      "frames: 882000, reward: 17.600000, loss: 0.000587, epsilon: 0.010000, episode:  470\n",
      "frames: 883000, reward: 17.600000, loss: 0.005166, epsilon: 0.010000, episode:  471\n",
      "frames: 884000, reward: 17.600000, loss: 0.000361, epsilon: 0.010000, episode:  471\n",
      "frames: 885000, reward: 17.600000, loss: 0.000282, epsilon: 0.010000, episode:  471\n",
      "frames: 886000, reward: 17.400000, loss: 0.001501, epsilon: 0.010000, episode:  472\n",
      "frames: 887000, reward: 17.400000, loss: 0.000355, epsilon: 0.010000, episode:  472\n",
      "frames: 888000, reward: 17.500000, loss: 0.000473, epsilon: 0.010000, episode:  473\n",
      "frames: 889000, reward: 17.500000, loss: 0.000722, epsilon: 0.010000, episode:  473\n",
      "frames: 890000, reward: 17.400000, loss: 0.000499, epsilon: 0.010000, episode:  474\n",
      "frames: 891000, reward: 17.400000, loss: 0.001262, epsilon: 0.010000, episode:  474\n",
      "frames: 892000, reward: 17.400000, loss: 0.000647, epsilon: 0.010000, episode:  474\n",
      "frames: 893000, reward: 18.000000, loss: 0.000218, epsilon: 0.010000, episode:  475\n",
      "frames: 894000, reward: 18.000000, loss: 0.000594, epsilon: 0.010000, episode:  475\n",
      "frames: 895000, reward: 17.900000, loss: 0.000316, epsilon: 0.010000, episode:  476\n",
      "frames: 896000, reward: 17.900000, loss: 0.000724, epsilon: 0.010000, episode:  476\n",
      "frames: 897000, reward: 17.900000, loss: 0.000184, epsilon: 0.010000, episode:  476\n",
      "frames: 898000, reward: 17.800000, loss: 0.000769, epsilon: 0.010000, episode:  477\n",
      "frames: 899000, reward: 17.800000, loss: 0.000589, epsilon: 0.010000, episode:  477\n",
      "frames: 900000, reward: 18.000000, loss: 0.000672, epsilon: 0.010000, episode:  478\n",
      "frames: 901000, reward: 18.000000, loss: 0.001155, epsilon: 0.010000, episode:  478\n",
      "frames: 902000, reward: 18.000000, loss: 0.000700, epsilon: 0.010000, episode:  478\n",
      "frames: 903000, reward: 17.700000, loss: 0.000372, epsilon: 0.010000, episode:  479\n",
      "frames: 904000, reward: 17.700000, loss: 0.000766, epsilon: 0.010000, episode:  479\n",
      "frames: 905000, reward: 17.200000, loss: 0.000740, epsilon: 0.010000, episode:  480\n",
      "frames: 906000, reward: 17.200000, loss: 0.000998, epsilon: 0.010000, episode:  480\n",
      "frames: 907000, reward: 17.700000, loss: 0.000413, epsilon: 0.010000, episode:  481\n",
      "frames: 908000, reward: 17.700000, loss: 0.000391, epsilon: 0.010000, episode:  481\n",
      "frames: 909000, reward: 17.700000, loss: 0.000936, epsilon: 0.010000, episode:  481\n",
      "frames: 910000, reward: 17.300000, loss: 0.004240, epsilon: 0.010000, episode:  482\n",
      "frames: 911000, reward: 17.300000, loss: 0.000568, epsilon: 0.010000, episode:  482\n",
      "frames: 912000, reward: 17.400000, loss: 0.001164, epsilon: 0.010000, episode:  483\n",
      "frames: 913000, reward: 17.400000, loss: 0.000676, epsilon: 0.010000, episode:  483\n",
      "frames: 914000, reward: 17.400000, loss: 0.000479, epsilon: 0.010000, episode:  483\n",
      "frames: 915000, reward: 17.000000, loss: 0.001226, epsilon: 0.010000, episode:  484\n",
      "frames: 916000, reward: 17.000000, loss: 0.000745, epsilon: 0.010000, episode:  484\n",
      "frames: 917000, reward: 16.700000, loss: 0.000731, epsilon: 0.010000, episode:  485\n",
      "frames: 918000, reward: 16.700000, loss: 0.000453, epsilon: 0.010000, episode:  485\n",
      "frames: 919000, reward: 16.700000, loss: 0.000383, epsilon: 0.010000, episode:  485\n",
      "frames: 920000, reward: 16.700000, loss: 0.000846, epsilon: 0.010000, episode:  485\n",
      "frames: 921000, reward: 16.100000, loss: 0.000796, epsilon: 0.010000, episode:  486\n",
      "frames: 922000, reward: 16.100000, loss: 0.000432, epsilon: 0.010000, episode:  486\n",
      "frames: 923000, reward: 16.500000, loss: 0.001776, epsilon: 0.010000, episode:  487\n",
      "frames: 924000, reward: 16.500000, loss: 0.000540, epsilon: 0.010000, episode:  487\n",
      "frames: 925000, reward: 16.200000, loss: 0.000326, epsilon: 0.010000, episode:  488\n",
      "frames: 926000, reward: 16.200000, loss: 0.000419, epsilon: 0.010000, episode:  488\n",
      "frames: 927000, reward: 16.200000, loss: 0.001886, epsilon: 0.010000, episode:  488\n",
      "frames: 928000, reward: 16.500000, loss: 0.001242, epsilon: 0.010000, episode:  489\n",
      "frames: 929000, reward: 16.500000, loss: 0.000959, epsilon: 0.010000, episode:  489\n",
      "frames: 930000, reward: 16.900000, loss: 0.000728, epsilon: 0.010000, episode:  490\n",
      "frames: 931000, reward: 16.900000, loss: 0.000691, epsilon: 0.010000, episode:  490\n",
      "frames: 932000, reward: 16.700000, loss: 0.000841, epsilon: 0.010000, episode:  491\n",
      "frames: 933000, reward: 17.600000, loss: 0.000519, epsilon: 0.010000, episode:  492\n",
      "frames: 934000, reward: 17.600000, loss: 0.000504, epsilon: 0.010000, episode:  492\n",
      "frames: 935000, reward: 17.600000, loss: 0.000629, epsilon: 0.010000, episode:  492\n",
      "frames: 936000, reward: 17.600000, loss: 0.000288, epsilon: 0.010000, episode:  493\n",
      "frames: 937000, reward: 17.600000, loss: 0.000401, epsilon: 0.010000, episode:  493\n",
      "frames: 938000, reward: 18.000000, loss: 0.000545, epsilon: 0.010000, episode:  494\n",
      "frames: 939000, reward: 18.000000, loss: 0.000699, epsilon: 0.010000, episode:  494\n",
      "frames: 940000, reward: 18.400000, loss: 0.001146, epsilon: 0.010000, episode:  495\n",
      "frames: 941000, reward: 18.400000, loss: 0.000282, epsilon: 0.010000, episode:  495\n",
      "frames: 942000, reward: 19.000000, loss: 0.000556, epsilon: 0.010000, episode:  496\n",
      "frames: 943000, reward: 19.000000, loss: 0.000305, epsilon: 0.010000, episode:  496\n",
      "frames: 944000, reward: 18.700000, loss: 0.000380, epsilon: 0.010000, episode:  497\n",
      "frames: 945000, reward: 18.700000, loss: 0.000818, epsilon: 0.010000, episode:  497\n",
      "frames: 946000, reward: 18.700000, loss: 0.000928, epsilon: 0.010000, episode:  497\n",
      "frames: 947000, reward: 19.000000, loss: 0.000496, epsilon: 0.010000, episode:  498\n",
      "frames: 948000, reward: 19.000000, loss: 0.000669, epsilon: 0.010000, episode:  498\n",
      "frames: 949000, reward: 18.400000, loss: 0.000449, epsilon: 0.010000, episode:  499\n",
      "frames: 950000, reward: 18.400000, loss: 0.000614, epsilon: 0.010000, episode:  499\n",
      "frames: 951000, reward: 18.400000, loss: 0.000308, epsilon: 0.010000, episode:  499\n",
      "frames: 952000, reward: 18.500000, loss: 0.000352, epsilon: 0.010000, episode:  500\n",
      "frames: 953000, reward: 18.500000, loss: 0.001158, epsilon: 0.010000, episode:  500\n",
      "frames: 954000, reward: 18.500000, loss: 0.000551, epsilon: 0.010000, episode:  501\n",
      "frames: 955000, reward: 18.500000, loss: 0.000199, epsilon: 0.010000, episode:  501\n",
      "frames: 956000, reward: 18.300000, loss: 0.000840, epsilon: 0.010000, episode:  502\n",
      "frames: 957000, reward: 18.300000, loss: 0.000457, epsilon: 0.010000, episode:  502\n",
      "frames: 958000, reward: 18.200000, loss: 0.004253, epsilon: 0.010000, episode:  503\n",
      "frames: 959000, reward: 18.200000, loss: 0.000258, epsilon: 0.010000, episode:  503\n",
      "frames: 960000, reward: 17.500000, loss: 0.000193, epsilon: 0.010000, episode:  504\n",
      "frames: 961000, reward: 17.500000, loss: 0.000296, epsilon: 0.010000, episode:  504\n",
      "frames: 962000, reward: 17.500000, loss: 0.000484, epsilon: 0.010000, episode:  504\n",
      "frames: 963000, reward: 17.300000, loss: 0.000631, epsilon: 0.010000, episode:  505\n",
      "frames: 964000, reward: 17.300000, loss: 0.000205, epsilon: 0.010000, episode:  506\n",
      "frames: 965000, reward: 17.300000, loss: 0.000447, epsilon: 0.010000, episode:  506\n",
      "frames: 966000, reward: 17.600000, loss: 0.000960, epsilon: 0.010000, episode:  507\n",
      "frames: 967000, reward: 17.600000, loss: 0.000277, epsilon: 0.010000, episode:  507\n",
      "frames: 968000, reward: 17.900000, loss: 0.000832, epsilon: 0.010000, episode:  508\n",
      "frames: 969000, reward: 17.900000, loss: 0.000846, epsilon: 0.010000, episode:  508\n",
      "frames: 970000, reward: 17.900000, loss: 0.000750, epsilon: 0.010000, episode:  509\n",
      "frames: 971000, reward: 17.900000, loss: 0.000318, epsilon: 0.010000, episode:  509\n",
      "frames: 972000, reward: 17.900000, loss: 0.000384, epsilon: 0.010000, episode:  509\n",
      "frames: 973000, reward: 17.800000, loss: 0.000207, epsilon: 0.010000, episode:  510\n",
      "frames: 974000, reward: 17.800000, loss: 0.000427, epsilon: 0.010000, episode:  510\n",
      "frames: 975000, reward: 17.900000, loss: 0.000210, epsilon: 0.010000, episode:  511\n",
      "frames: 976000, reward: 17.900000, loss: 0.000278, epsilon: 0.010000, episode:  511\n",
      "frames: 977000, reward: 17.900000, loss: 0.000414, epsilon: 0.010000, episode:  512\n",
      "frames: 978000, reward: 17.900000, loss: 0.000501, epsilon: 0.010000, episode:  512\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 979000, reward: 17.900000, loss: 0.000399, epsilon: 0.010000, episode:  512\n",
      "frames: 980000, reward: 17.900000, loss: 0.001210, epsilon: 0.010000, episode:  513\n",
      "frames: 981000, reward: 17.900000, loss: 0.000654, epsilon: 0.010000, episode:  513\n",
      "frames: 982000, reward: 17.900000, loss: 0.000593, epsilon: 0.010000, episode:  513\n",
      "frames: 983000, reward: 18.000000, loss: 0.001259, epsilon: 0.010000, episode:  514\n",
      "frames: 984000, reward: 18.000000, loss: 0.000258, epsilon: 0.010000, episode:  514\n",
      "frames: 985000, reward: 18.000000, loss: 0.001165, epsilon: 0.010000, episode:  514\n",
      "frames: 986000, reward: 18.100000, loss: 0.000472, epsilon: 0.010000, episode:  515\n",
      "frames: 987000, reward: 18.100000, loss: 0.000391, epsilon: 0.010000, episode:  515\n",
      "frames: 988000, reward: 18.300000, loss: 0.000209, epsilon: 0.010000, episode:  516\n",
      "frames: 989000, reward: 18.300000, loss: 0.000810, epsilon: 0.010000, episode:  516\n",
      "frames: 990000, reward: 18.200000, loss: 0.000343, epsilon: 0.010000, episode:  517\n",
      "frames: 991000, reward: 18.200000, loss: 0.000424, epsilon: 0.010000, episode:  517\n",
      "frames: 992000, reward: 18.200000, loss: 0.001002, epsilon: 0.010000, episode:  517\n",
      "frames: 993000, reward: 17.500000, loss: 0.000149, epsilon: 0.010000, episode:  518\n",
      "frames: 994000, reward: 17.500000, loss: 0.000346, epsilon: 0.010000, episode:  518\n",
      "frames: 995000, reward: 17.500000, loss: 0.000419, epsilon: 0.010000, episode:  518\n",
      "frames: 996000, reward: 18.000000, loss: 0.000915, epsilon: 0.010000, episode:  519\n",
      "frames: 997000, reward: 18.000000, loss: 0.000341, epsilon: 0.010000, episode:  519\n",
      "frames: 998000, reward: 17.700000, loss: 0.000245, epsilon: 0.010000, episode:  520\n",
      "frames: 999000, reward: 17.700000, loss: 0.001429, epsilon: 0.010000, episode:  520\n",
      "frames: 1000000, reward: 17.700000, loss: 0.000402, epsilon: 0.010000, episode:  521\n",
      "frames: 1001000, reward: 17.700000, loss: 0.000561, epsilon: 0.010000, episode:  521\n",
      "frames: 1002000, reward: 17.700000, loss: 0.000434, epsilon: 0.010000, episode:  521\n",
      "frames: 1003000, reward: 17.800000, loss: 0.000264, epsilon: 0.010000, episode:  522\n",
      "frames: 1004000, reward: 17.800000, loss: 0.000601, epsilon: 0.010000, episode:  522\n",
      "frames: 1005000, reward: 17.900000, loss: 0.000456, epsilon: 0.010000, episode:  523\n",
      "frames: 1006000, reward: 17.900000, loss: 0.000380, epsilon: 0.010000, episode:  523\n",
      "frames: 1007000, reward: 17.900000, loss: 0.000299, epsilon: 0.010000, episode:  523\n",
      "frames: 1008000, reward: 18.000000, loss: 0.000423, epsilon: 0.010000, episode:  524\n",
      "frames: 1009000, reward: 18.300000, loss: 0.000481, epsilon: 0.010000, episode:  525\n",
      "frames: 1010000, reward: 18.300000, loss: 0.000438, epsilon: 0.010000, episode:  525\n",
      "frames: 1011000, reward: 18.300000, loss: 0.000234, epsilon: 0.010000, episode:  525\n",
      "frames: 1012000, reward: 17.700000, loss: 0.000898, epsilon: 0.010000, episode:  526\n",
      "frames: 1013000, reward: 17.700000, loss: 0.000710, epsilon: 0.010000, episode:  526\n",
      "frames: 1014000, reward: 17.800000, loss: 0.000578, epsilon: 0.010000, episode:  527\n",
      "frames: 1015000, reward: 17.800000, loss: 0.003783, epsilon: 0.010000, episode:  527\n",
      "frames: 1016000, reward: 17.800000, loss: 0.000797, epsilon: 0.010000, episode:  527\n",
      "frames: 1017000, reward: 18.600000, loss: 0.000194, epsilon: 0.010000, episode:  528\n",
      "frames: 1018000, reward: 18.600000, loss: 0.000350, epsilon: 0.010000, episode:  528\n",
      "frames: 1019000, reward: 18.300000, loss: 0.002168, epsilon: 0.010000, episode:  529\n",
      "frames: 1020000, reward: 18.300000, loss: 0.000564, epsilon: 0.010000, episode:  529\n",
      "frames: 1021000, reward: 18.300000, loss: 0.000631, epsilon: 0.010000, episode:  529\n",
      "frames: 1022000, reward: 18.600000, loss: 0.000854, epsilon: 0.010000, episode:  530\n",
      "frames: 1023000, reward: 18.600000, loss: 0.000348, epsilon: 0.010000, episode:  530\n",
      "frames: 1024000, reward: 18.700000, loss: 0.002086, epsilon: 0.010000, episode:  531\n",
      "frames: 1025000, reward: 18.700000, loss: 0.000334, epsilon: 0.010000, episode:  531\n",
      "frames: 1026000, reward: 18.800000, loss: 0.000486, epsilon: 0.010000, episode:  532\n",
      "frames: 1027000, reward: 18.800000, loss: 0.000353, epsilon: 0.010000, episode:  532\n",
      "frames: 1028000, reward: 18.700000, loss: 0.000243, epsilon: 0.010000, episode:  533\n",
      "frames: 1029000, reward: 18.700000, loss: 0.000633, epsilon: 0.010000, episode:  533\n",
      "frames: 1030000, reward: 19.300000, loss: 0.000434, epsilon: 0.010000, episode:  534\n",
      "frames: 1031000, reward: 19.300000, loss: 0.000245, epsilon: 0.010000, episode:  534\n",
      "frames: 1032000, reward: 19.300000, loss: 0.000295, epsilon: 0.010000, episode:  534\n",
      "frames: 1033000, reward: 18.900000, loss: 0.000434, epsilon: 0.010000, episode:  535\n",
      "frames: 1034000, reward: 18.900000, loss: 0.000270, epsilon: 0.010000, episode:  535\n",
      "frames: 1035000, reward: 18.900000, loss: 0.001109, epsilon: 0.010000, episode:  535\n",
      "frames: 1036000, reward: 19.400000, loss: 0.000453, epsilon: 0.010000, episode:  536\n",
      "frames: 1037000, reward: 19.400000, loss: 0.000251, epsilon: 0.010000, episode:  536\n",
      "frames: 1038000, reward: 19.300000, loss: 0.000236, epsilon: 0.010000, episode:  537\n",
      "frames: 1039000, reward: 19.300000, loss: 0.000119, epsilon: 0.010000, episode:  537\n",
      "frames: 1040000, reward: 19.200000, loss: 0.000427, epsilon: 0.010000, episode:  538\n",
      "frames: 1041000, reward: 19.200000, loss: 0.000479, epsilon: 0.010000, episode:  538\n",
      "frames: 1042000, reward: 19.300000, loss: 0.000824, epsilon: 0.010000, episode:  539\n",
      "frames: 1043000, reward: 19.300000, loss: 0.000201, epsilon: 0.010000, episode:  539\n",
      "frames: 1044000, reward: 19.300000, loss: 0.000455, epsilon: 0.010000, episode:  540\n",
      "frames: 1045000, reward: 19.300000, loss: 0.000258, epsilon: 0.010000, episode:  540\n",
      "frames: 1046000, reward: 19.400000, loss: 0.000149, epsilon: 0.010000, episode:  541\n",
      "frames: 1047000, reward: 19.400000, loss: 0.000513, epsilon: 0.010000, episode:  541\n",
      "frames: 1048000, reward: 19.400000, loss: 0.000272, epsilon: 0.010000, episode:  541\n",
      "frames: 1049000, reward: 19.000000, loss: 0.000742, epsilon: 0.010000, episode:  542\n",
      "frames: 1050000, reward: 19.000000, loss: 0.000417, epsilon: 0.010000, episode:  543\n",
      "frames: 1051000, reward: 19.000000, loss: 0.000362, epsilon: 0.010000, episode:  543\n",
      "frames: 1052000, reward: 18.600000, loss: 0.000565, epsilon: 0.010000, episode:  544\n",
      "frames: 1053000, reward: 18.600000, loss: 0.000634, epsilon: 0.010000, episode:  544\n",
      "frames: 1054000, reward: 18.900000, loss: 0.000618, epsilon: 0.010000, episode:  545\n",
      "frames: 1055000, reward: 18.900000, loss: 0.000284, epsilon: 0.010000, episode:  545\n",
      "frames: 1056000, reward: 18.900000, loss: 0.000266, epsilon: 0.010000, episode:  546\n",
      "frames: 1057000, reward: 18.900000, loss: 0.000346, epsilon: 0.010000, episode:  546\n",
      "frames: 1058000, reward: 18.900000, loss: 0.000753, epsilon: 0.010000, episode:  546\n",
      "frames: 1059000, reward: 19.000000, loss: 0.000237, epsilon: 0.010000, episode:  547\n",
      "frames: 1060000, reward: 19.000000, loss: 0.000371, epsilon: 0.010000, episode:  547\n",
      "frames: 1061000, reward: 18.700000, loss: 0.000407, epsilon: 0.010000, episode:  548\n",
      "frames: 1062000, reward: 18.700000, loss: 0.000253, epsilon: 0.010000, episode:  548\n",
      "frames: 1063000, reward: 18.900000, loss: 0.000925, epsilon: 0.010000, episode:  549\n",
      "frames: 1064000, reward: 18.900000, loss: 0.000431, epsilon: 0.010000, episode:  549\n",
      "frames: 1065000, reward: 18.900000, loss: 0.000441, epsilon: 0.010000, episode:  549\n",
      "frames: 1066000, reward: 18.600000, loss: 0.000235, epsilon: 0.010000, episode:  550\n",
      "frames: 1067000, reward: 18.600000, loss: 0.000288, epsilon: 0.010000, episode:  550\n",
      "frames: 1068000, reward: 18.400000, loss: 0.000349, epsilon: 0.010000, episode:  551\n",
      "frames: 1069000, reward: 18.400000, loss: 0.000302, epsilon: 0.010000, episode:  551\n",
      "frames: 1070000, reward: 18.600000, loss: 0.000678, epsilon: 0.010000, episode:  552\n",
      "frames: 1071000, reward: 18.600000, loss: 0.000284, epsilon: 0.010000, episode:  552\n",
      "frames: 1072000, reward: 18.300000, loss: 0.000387, epsilon: 0.010000, episode:  553\n",
      "frames: 1073000, reward: 18.300000, loss: 0.000333, epsilon: 0.010000, episode:  553\n",
      "frames: 1074000, reward: 18.500000, loss: 0.000172, epsilon: 0.010000, episode:  554\n",
      "frames: 1075000, reward: 18.500000, loss: 0.000215, epsilon: 0.010000, episode:  554\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 1076000, reward: 18.500000, loss: 0.000467, epsilon: 0.010000, episode:  554\n",
      "frames: 1077000, reward: 18.600000, loss: 0.000988, epsilon: 0.010000, episode:  555\n",
      "frames: 1078000, reward: 18.600000, loss: 0.000565, epsilon: 0.010000, episode:  555\n",
      "frames: 1079000, reward: 18.500000, loss: 0.000326, epsilon: 0.010000, episode:  556\n",
      "frames: 1080000, reward: 18.500000, loss: 0.000333, epsilon: 0.010000, episode:  556\n",
      "frames: 1081000, reward: 18.600000, loss: 0.000643, epsilon: 0.010000, episode:  557\n",
      "frames: 1082000, reward: 18.600000, loss: 0.000319, epsilon: 0.010000, episode:  557\n",
      "frames: 1083000, reward: 18.600000, loss: 0.000329, epsilon: 0.010000, episode:  557\n",
      "frames: 1084000, reward: 19.000000, loss: 0.000367, epsilon: 0.010000, episode:  558\n",
      "frames: 1085000, reward: 19.000000, loss: 0.000505, epsilon: 0.010000, episode:  558\n",
      "frames: 1086000, reward: 19.100000, loss: 0.001216, epsilon: 0.010000, episode:  559\n",
      "frames: 1087000, reward: 19.100000, loss: 0.000577, epsilon: 0.010000, episode:  559\n",
      "frames: 1088000, reward: 19.000000, loss: 0.000202, epsilon: 0.010000, episode:  560\n",
      "frames: 1089000, reward: 19.000000, loss: 0.000200, epsilon: 0.010000, episode:  560\n",
      "frames: 1090000, reward: 19.200000, loss: 0.000260, epsilon: 0.010000, episode:  561\n",
      "frames: 1091000, reward: 19.200000, loss: 0.000576, epsilon: 0.010000, episode:  561\n",
      "frames: 1092000, reward: 19.400000, loss: 0.004730, epsilon: 0.010000, episode:  562\n",
      "frames: 1093000, reward: 19.400000, loss: 0.000467, epsilon: 0.010000, episode:  562\n",
      "frames: 1094000, reward: 19.500000, loss: 0.000476, epsilon: 0.010000, episode:  563\n",
      "frames: 1095000, reward: 19.500000, loss: 0.000199, epsilon: 0.010000, episode:  563\n",
      "frames: 1096000, reward: 19.600000, loss: 0.003148, epsilon: 0.010000, episode:  564\n",
      "frames: 1097000, reward: 19.600000, loss: 0.000302, epsilon: 0.010000, episode:  564\n",
      "frames: 1098000, reward: 19.500000, loss: 0.000213, epsilon: 0.010000, episode:  565\n",
      "frames: 1099000, reward: 19.500000, loss: 0.000261, epsilon: 0.010000, episode:  565\n",
      "frames: 1100000, reward: 19.200000, loss: 0.000679, epsilon: 0.010000, episode:  566\n",
      "frames: 1101000, reward: 19.200000, loss: 0.000256, epsilon: 0.010000, episode:  566\n",
      "frames: 1102000, reward: 19.100000, loss: 0.000719, epsilon: 0.010000, episode:  567\n",
      "frames: 1103000, reward: 19.100000, loss: 0.000294, epsilon: 0.010000, episode:  567\n",
      "frames: 1104000, reward: 18.900000, loss: 0.000355, epsilon: 0.010000, episode:  568\n",
      "frames: 1105000, reward: 18.900000, loss: 0.000216, epsilon: 0.010000, episode:  568\n",
      "frames: 1106000, reward: 18.300000, loss: 0.000273, epsilon: 0.010000, episode:  569\n",
      "frames: 1107000, reward: 18.300000, loss: 0.000376, epsilon: 0.010000, episode:  569\n",
      "frames: 1108000, reward: 18.300000, loss: 0.000291, epsilon: 0.010000, episode:  569\n",
      "frames: 1109000, reward: 18.600000, loss: 0.000375, epsilon: 0.010000, episode:  570\n",
      "frames: 1110000, reward: 18.600000, loss: 0.000352, epsilon: 0.010000, episode:  570\n",
      "frames: 1111000, reward: 18.600000, loss: 0.000260, epsilon: 0.010000, episode:  571\n",
      "frames: 1112000, reward: 18.600000, loss: 0.000235, epsilon: 0.010000, episode:  571\n",
      "frames: 1113000, reward: 18.300000, loss: 0.000338, epsilon: 0.010000, episode:  572\n",
      "frames: 1114000, reward: 18.300000, loss: 0.000422, epsilon: 0.010000, episode:  572\n",
      "frames: 1115000, reward: 18.600000, loss: 0.000441, epsilon: 0.010000, episode:  573\n",
      "frames: 1116000, reward: 18.600000, loss: 0.001520, epsilon: 0.010000, episode:  573\n",
      "frames: 1117000, reward: 18.700000, loss: 0.000354, epsilon: 0.010000, episode:  574\n",
      "frames: 1118000, reward: 18.700000, loss: 0.000243, epsilon: 0.010000, episode:  574\n",
      "frames: 1119000, reward: 18.500000, loss: 0.000347, epsilon: 0.010000, episode:  575\n",
      "frames: 1120000, reward: 18.800000, loss: 0.000242, epsilon: 0.010000, episode:  576\n",
      "frames: 1121000, reward: 18.800000, loss: 0.003013, epsilon: 0.010000, episode:  576\n",
      "frames: 1122000, reward: 18.600000, loss: 0.000660, epsilon: 0.010000, episode:  577\n",
      "frames: 1123000, reward: 18.600000, loss: 0.000387, epsilon: 0.010000, episode:  577\n",
      "frames: 1124000, reward: 18.600000, loss: 0.000328, epsilon: 0.010000, episode:  578\n",
      "frames: 1125000, reward: 18.600000, loss: 0.000281, epsilon: 0.010000, episode:  578\n",
      "frames: 1126000, reward: 19.300000, loss: 0.000216, epsilon: 0.010000, episode:  579\n",
      "frames: 1127000, reward: 19.300000, loss: 0.000266, epsilon: 0.010000, episode:  579\n",
      "frames: 1128000, reward: 19.200000, loss: 0.004942, epsilon: 0.010000, episode:  580\n",
      "frames: 1129000, reward: 19.200000, loss: 0.000306, epsilon: 0.010000, episode:  580\n",
      "frames: 1130000, reward: 19.200000, loss: 0.000232, epsilon: 0.010000, episode:  581\n",
      "frames: 1131000, reward: 19.200000, loss: 0.000353, epsilon: 0.010000, episode:  581\n",
      "frames: 1132000, reward: 19.400000, loss: 0.000426, epsilon: 0.010000, episode:  582\n",
      "frames: 1133000, reward: 19.000000, loss: 0.000328, epsilon: 0.010000, episode:  583\n",
      "frames: 1134000, reward: 19.000000, loss: 0.000438, epsilon: 0.010000, episode:  583\n",
      "frames: 1135000, reward: 19.000000, loss: 0.000252, epsilon: 0.010000, episode:  584\n",
      "frames: 1136000, reward: 19.000000, loss: 0.000187, epsilon: 0.010000, episode:  584\n",
      "frames: 1137000, reward: 19.000000, loss: 0.000385, epsilon: 0.010000, episode:  584\n",
      "frames: 1138000, reward: 19.200000, loss: 0.003203, epsilon: 0.010000, episode:  585\n",
      "frames: 1139000, reward: 19.200000, loss: 0.000149, epsilon: 0.010000, episode:  585\n",
      "frames: 1140000, reward: 19.300000, loss: 0.000217, epsilon: 0.010000, episode:  586\n",
      "frames: 1141000, reward: 19.300000, loss: 0.000370, epsilon: 0.010000, episode:  586\n",
      "frames: 1142000, reward: 19.300000, loss: 0.000393, epsilon: 0.010000, episode:  586\n",
      "frames: 1143000, reward: 19.300000, loss: 0.001094, epsilon: 0.010000, episode:  587\n",
      "frames: 1144000, reward: 19.300000, loss: 0.000284, epsilon: 0.010000, episode:  587\n",
      "frames: 1145000, reward: 19.300000, loss: 0.000248, epsilon: 0.010000, episode:  588\n",
      "frames: 1146000, reward: 19.300000, loss: 0.000499, epsilon: 0.010000, episode:  588\n",
      "frames: 1147000, reward: 19.300000, loss: 0.000978, epsilon: 0.010000, episode:  589\n",
      "frames: 1148000, reward: 19.300000, loss: 0.000565, epsilon: 0.010000, episode:  589\n",
      "frames: 1149000, reward: 19.300000, loss: 0.000452, epsilon: 0.010000, episode:  589\n",
      "frames: 1150000, reward: 19.600000, loss: 0.000272, epsilon: 0.010000, episode:  590\n",
      "frames: 1151000, reward: 19.600000, loss: 0.000199, epsilon: 0.010000, episode:  590\n",
      "frames: 1152000, reward: 19.700000, loss: 0.000159, epsilon: 0.010000, episode:  591\n",
      "frames: 1153000, reward: 19.700000, loss: 0.000294, epsilon: 0.010000, episode:  591\n",
      "frames: 1154000, reward: 19.700000, loss: 0.000301, epsilon: 0.010000, episode:  592\n",
      "frames: 1155000, reward: 19.700000, loss: 0.000201, epsilon: 0.010000, episode:  592\n",
      "frames: 1156000, reward: 20.100000, loss: 0.000500, epsilon: 0.010000, episode:  593\n",
      "frames: 1157000, reward: 20.100000, loss: 0.000199, epsilon: 0.010000, episode:  593\n",
      "frames: 1158000, reward: 20.100000, loss: 0.000225, epsilon: 0.010000, episode:  593\n",
      "frames: 1159000, reward: 19.600000, loss: 0.000370, epsilon: 0.010000, episode:  594\n",
      "frames: 1160000, reward: 19.600000, loss: 0.000987, epsilon: 0.010000, episode:  594\n",
      "frames: 1161000, reward: 19.500000, loss: 0.000393, epsilon: 0.010000, episode:  595\n",
      "frames: 1162000, reward: 19.500000, loss: 0.000306, epsilon: 0.010000, episode:  595\n",
      "frames: 1163000, reward: 19.300000, loss: 0.000169, epsilon: 0.010000, episode:  596\n",
      "frames: 1164000, reward: 19.300000, loss: 0.000156, epsilon: 0.010000, episode:  596\n",
      "frames: 1165000, reward: 19.300000, loss: 0.000412, epsilon: 0.010000, episode:  596\n",
      "frames: 1166000, reward: 19.400000, loss: 0.000277, epsilon: 0.010000, episode:  597\n",
      "frames: 1167000, reward: 19.400000, loss: 0.000370, epsilon: 0.010000, episode:  597\n",
      "frames: 1168000, reward: 19.400000, loss: 0.000344, epsilon: 0.010000, episode:  598\n",
      "frames: 1169000, reward: 19.400000, loss: 0.000565, epsilon: 0.010000, episode:  599\n",
      "frames: 1170000, reward: 19.400000, loss: 0.000223, epsilon: 0.010000, episode:  599\n",
      "frames: 1171000, reward: 19.200000, loss: 0.000231, epsilon: 0.010000, episode:  600\n",
      "frames: 1172000, reward: 19.200000, loss: 0.003570, epsilon: 0.010000, episode:  600\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 1173000, reward: 19.200000, loss: 0.000714, epsilon: 0.010000, episode:  600\n",
      "frames: 1174000, reward: 19.200000, loss: 0.000301, epsilon: 0.010000, episode:  601\n",
      "frames: 1175000, reward: 19.200000, loss: 0.000715, epsilon: 0.010000, episode:  601\n",
      "frames: 1176000, reward: 19.200000, loss: 0.000239, epsilon: 0.010000, episode:  602\n",
      "frames: 1177000, reward: 19.200000, loss: 0.000231, epsilon: 0.010000, episode:  602\n",
      "frames: 1178000, reward: 19.200000, loss: 0.000218, epsilon: 0.010000, episode:  603\n",
      "frames: 1179000, reward: 19.200000, loss: 0.000220, epsilon: 0.010000, episode:  603\n",
      "frames: 1180000, reward: 19.500000, loss: 0.000216, epsilon: 0.010000, episode:  604\n",
      "frames: 1181000, reward: 19.500000, loss: 0.000265, epsilon: 0.010000, episode:  604\n",
      "frames: 1182000, reward: 19.500000, loss: 0.000156, epsilon: 0.010000, episode:  605\n",
      "frames: 1183000, reward: 19.500000, loss: 0.000439, epsilon: 0.010000, episode:  605\n",
      "frames: 1184000, reward: 19.500000, loss: 0.000172, epsilon: 0.010000, episode:  605\n",
      "frames: 1185000, reward: 19.500000, loss: 0.000169, epsilon: 0.010000, episode:  606\n",
      "frames: 1186000, reward: 19.500000, loss: 0.000150, epsilon: 0.010000, episode:  606\n",
      "frames: 1187000, reward: 19.700000, loss: 0.000467, epsilon: 0.010000, episode:  607\n",
      "frames: 1188000, reward: 19.700000, loss: 0.000998, epsilon: 0.010000, episode:  607\n",
      "frames: 1189000, reward: 19.700000, loss: 0.000507, epsilon: 0.010000, episode:  608\n",
      "frames: 1190000, reward: 19.700000, loss: 0.000332, epsilon: 0.010000, episode:  608\n",
      "frames: 1191000, reward: 19.700000, loss: 0.000973, epsilon: 0.010000, episode:  608\n",
      "frames: 1192000, reward: 19.700000, loss: 0.000483, epsilon: 0.010000, episode:  609\n",
      "frames: 1193000, reward: 19.700000, loss: 0.000117, epsilon: 0.010000, episode:  609\n",
      "frames: 1194000, reward: 19.700000, loss: 0.000272, epsilon: 0.010000, episode:  610\n",
      "frames: 1195000, reward: 19.700000, loss: 0.000254, epsilon: 0.010000, episode:  610\n",
      "frames: 1196000, reward: 19.400000, loss: 0.000289, epsilon: 0.010000, episode:  611\n",
      "frames: 1197000, reward: 19.400000, loss: 0.000221, epsilon: 0.010000, episode:  611\n",
      "frames: 1198000, reward: 19.600000, loss: 0.000232, epsilon: 0.010000, episode:  612\n",
      "frames: 1199000, reward: 19.600000, loss: 0.000565, epsilon: 0.010000, episode:  613\n",
      "frames: 1200000, reward: 19.600000, loss: 0.000298, epsilon: 0.010000, episode:  613\n",
      "frames: 1201000, reward: 19.600000, loss: 0.000201, epsilon: 0.010000, episode:  613\n",
      "frames: 1202000, reward: 19.700000, loss: 0.000144, epsilon: 0.010000, episode:  614\n",
      "frames: 1203000, reward: 19.700000, loss: 0.000148, epsilon: 0.010000, episode:  614\n",
      "frames: 1204000, reward: 19.300000, loss: 0.000196, epsilon: 0.010000, episode:  615\n",
      "frames: 1205000, reward: 19.300000, loss: 0.000347, epsilon: 0.010000, episode:  615\n",
      "frames: 1206000, reward: 19.600000, loss: 0.000690, epsilon: 0.010000, episode:  616\n",
      "frames: 1207000, reward: 19.600000, loss: 0.000435, epsilon: 0.010000, episode:  616\n",
      "frames: 1208000, reward: 19.400000, loss: 0.003174, epsilon: 0.010000, episode:  617\n",
      "frames: 1209000, reward: 19.400000, loss: 0.000354, epsilon: 0.010000, episode:  617\n",
      "frames: 1210000, reward: 19.600000, loss: 0.000363, epsilon: 0.010000, episode:  618\n",
      "frames: 1211000, reward: 19.600000, loss: 0.000202, epsilon: 0.010000, episode:  618\n",
      "frames: 1212000, reward: 19.600000, loss: 0.000149, epsilon: 0.010000, episode:  619\n",
      "frames: 1213000, reward: 19.600000, loss: 0.000216, epsilon: 0.010000, episode:  619\n",
      "frames: 1214000, reward: 19.700000, loss: 0.000463, epsilon: 0.010000, episode:  620\n",
      "frames: 1215000, reward: 19.700000, loss: 0.000372, epsilon: 0.010000, episode:  620\n",
      "frames: 1216000, reward: 19.700000, loss: 0.000882, epsilon: 0.010000, episode:  620\n",
      "frames: 1217000, reward: 19.200000, loss: 0.000254, epsilon: 0.010000, episode:  621\n",
      "frames: 1218000, reward: 19.200000, loss: 0.000179, epsilon: 0.010000, episode:  621\n",
      "frames: 1219000, reward: 18.900000, loss: 0.000527, epsilon: 0.010000, episode:  622\n",
      "frames: 1220000, reward: 18.900000, loss: 0.000275, epsilon: 0.010000, episode:  622\n",
      "frames: 1221000, reward: 18.900000, loss: 0.000205, epsilon: 0.010000, episode:  622\n",
      "frames: 1222000, reward: 18.900000, loss: 0.000224, epsilon: 0.010000, episode:  623\n",
      "frames: 1223000, reward: 18.900000, loss: 0.000235, epsilon: 0.010000, episode:  623\n",
      "frames: 1224000, reward: 18.600000, loss: 0.000203, epsilon: 0.010000, episode:  624\n",
      "frames: 1225000, reward: 18.600000, loss: 0.000325, epsilon: 0.010000, episode:  624\n",
      "frames: 1226000, reward: 18.600000, loss: 0.000237, epsilon: 0.010000, episode:  624\n",
      "frames: 1227000, reward: 19.200000, loss: 0.000874, epsilon: 0.010000, episode:  625\n",
      "frames: 1228000, reward: 19.200000, loss: 0.000542, epsilon: 0.010000, episode:  625\n",
      "frames: 1229000, reward: 18.600000, loss: 0.000407, epsilon: 0.010000, episode:  626\n",
      "frames: 1230000, reward: 18.600000, loss: 0.000308, epsilon: 0.010000, episode:  626\n",
      "frames: 1231000, reward: 18.600000, loss: 0.000347, epsilon: 0.010000, episode:  627\n",
      "frames: 1232000, reward: 18.600000, loss: 0.000394, epsilon: 0.010000, episode:  627\n",
      "frames: 1233000, reward: 17.900000, loss: 0.000341, epsilon: 0.010000, episode:  628\n",
      "frames: 1234000, reward: 17.900000, loss: 0.000219, epsilon: 0.010000, episode:  628\n",
      "frames: 1235000, reward: 17.900000, loss: 0.000122, epsilon: 0.010000, episode:  628\n",
      "frames: 1236000, reward: 16.900000, loss: 0.000408, epsilon: 0.010000, episode:  629\n",
      "frames: 1237000, reward: 16.900000, loss: 0.000575, epsilon: 0.010000, episode:  629\n",
      "frames: 1238000, reward: 16.900000, loss: 0.000154, epsilon: 0.010000, episode:  629\n",
      "frames: 1239000, reward: 16.000000, loss: 0.000526, epsilon: 0.010000, episode:  630\n",
      "frames: 1240000, reward: 16.000000, loss: 0.000218, epsilon: 0.010000, episode:  630\n",
      "frames: 1241000, reward: 16.700000, loss: 0.000427, epsilon: 0.010000, episode:  631\n",
      "frames: 1242000, reward: 16.700000, loss: 0.000372, epsilon: 0.010000, episode:  631\n",
      "frames: 1243000, reward: 16.400000, loss: 0.001037, epsilon: 0.010000, episode:  632\n",
      "frames: 1244000, reward: 16.400000, loss: 0.000541, epsilon: 0.010000, episode:  632\n",
      "frames: 1245000, reward: 16.400000, loss: 0.000232, epsilon: 0.010000, episode:  632\n",
      "frames: 1246000, reward: 15.600000, loss: 0.001060, epsilon: 0.010000, episode:  633\n",
      "frames: 1247000, reward: 15.600000, loss: 0.000998, epsilon: 0.010000, episode:  633\n",
      "frames: 1248000, reward: 15.500000, loss: 0.000978, epsilon: 0.010000, episode:  634\n",
      "frames: 1249000, reward: 15.500000, loss: 0.000318, epsilon: 0.010000, episode:  634\n",
      "frames: 1250000, reward: 15.500000, loss: 0.000414, epsilon: 0.010000, episode:  634\n",
      "frames: 1251000, reward: 15.400000, loss: 0.000714, epsilon: 0.010000, episode:  635\n",
      "frames: 1252000, reward: 16.000000, loss: 0.001057, epsilon: 0.010000, episode:  636\n",
      "frames: 1253000, reward: 16.000000, loss: 0.001581, epsilon: 0.010000, episode:  636\n",
      "frames: 1254000, reward: 16.000000, loss: 0.000278, epsilon: 0.010000, episode:  636\n",
      "frames: 1255000, reward: 15.500000, loss: 0.000557, epsilon: 0.010000, episode:  637\n",
      "frames: 1256000, reward: 15.500000, loss: 0.000312, epsilon: 0.010000, episode:  637\n",
      "frames: 1257000, reward: 16.000000, loss: 0.000171, epsilon: 0.010000, episode:  638\n",
      "frames: 1258000, reward: 16.000000, loss: 0.000347, epsilon: 0.010000, episode:  638\n",
      "frames: 1259000, reward: 16.000000, loss: 0.000239, epsilon: 0.010000, episode:  638\n",
      "frames: 1260000, reward: 17.000000, loss: 0.000292, epsilon: 0.010000, episode:  639\n",
      "frames: 1261000, reward: 17.000000, loss: 0.000281, epsilon: 0.010000, episode:  639\n",
      "frames: 1262000, reward: 17.000000, loss: 0.000512, epsilon: 0.010000, episode:  639\n",
      "frames: 1263000, reward: 17.400000, loss: 0.000816, epsilon: 0.010000, episode:  640\n",
      "frames: 1264000, reward: 17.400000, loss: 0.000303, epsilon: 0.010000, episode:  640\n",
      "frames: 1265000, reward: 17.200000, loss: 0.000353, epsilon: 0.010000, episode:  641\n",
      "frames: 1266000, reward: 17.200000, loss: 0.000318, epsilon: 0.010000, episode:  641\n",
      "frames: 1267000, reward: 17.800000, loss: 0.000225, epsilon: 0.010000, episode:  642\n",
      "frames: 1268000, reward: 17.800000, loss: 0.000820, epsilon: 0.010000, episode:  642\n",
      "frames: 1269000, reward: 17.800000, loss: 0.000202, epsilon: 0.010000, episode:  642\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 1270000, reward: 17.500000, loss: 0.000180, epsilon: 0.010000, episode:  643\n",
      "frames: 1271000, reward: 17.500000, loss: 0.000367, epsilon: 0.010000, episode:  643\n",
      "frames: 1272000, reward: 17.500000, loss: 0.000215, epsilon: 0.010000, episode:  643\n",
      "frames: 1273000, reward: 17.700000, loss: 0.000867, epsilon: 0.010000, episode:  644\n",
      "frames: 1274000, reward: 17.700000, loss: 0.000387, epsilon: 0.010000, episode:  644\n",
      "frames: 1275000, reward: 17.800000, loss: 0.000549, epsilon: 0.010000, episode:  645\n",
      "frames: 1276000, reward: 17.800000, loss: 0.000587, epsilon: 0.010000, episode:  645\n",
      "frames: 1277000, reward: 17.200000, loss: 0.000319, epsilon: 0.010000, episode:  646\n",
      "frames: 1278000, reward: 17.200000, loss: 0.000815, epsilon: 0.010000, episode:  646\n",
      "frames: 1279000, reward: 17.200000, loss: 0.000584, epsilon: 0.010000, episode:  646\n",
      "frames: 1280000, reward: 17.900000, loss: 0.000311, epsilon: 0.010000, episode:  647\n",
      "frames: 1281000, reward: 17.900000, loss: 0.000816, epsilon: 0.010000, episode:  647\n",
      "frames: 1282000, reward: 17.800000, loss: 0.000273, epsilon: 0.010000, episode:  648\n",
      "frames: 1283000, reward: 17.800000, loss: 0.000360, epsilon: 0.010000, episode:  648\n",
      "frames: 1284000, reward: 17.500000, loss: 0.000181, epsilon: 0.010000, episode:  649\n",
      "frames: 1285000, reward: 17.500000, loss: 0.000463, epsilon: 0.010000, episode:  649\n",
      "frames: 1286000, reward: 17.500000, loss: 0.000322, epsilon: 0.010000, episode:  649\n",
      "frames: 1287000, reward: 18.000000, loss: 0.000385, epsilon: 0.010000, episode:  650\n",
      "frames: 1288000, reward: 18.000000, loss: 0.001499, epsilon: 0.010000, episode:  650\n",
      "frames: 1289000, reward: 18.200000, loss: 0.000186, epsilon: 0.010000, episode:  651\n",
      "frames: 1290000, reward: 18.200000, loss: 0.000482, epsilon: 0.010000, episode:  652\n",
      "frames: 1291000, reward: 18.200000, loss: 0.000153, epsilon: 0.010000, episode:  652\n",
      "frames: 1292000, reward: 18.200000, loss: 0.000268, epsilon: 0.010000, episode:  652\n",
      "frames: 1293000, reward: 19.000000, loss: 0.000481, epsilon: 0.010000, episode:  653\n",
      "frames: 1294000, reward: 19.000000, loss: 0.000310, epsilon: 0.010000, episode:  653\n",
      "frames: 1295000, reward: 19.300000, loss: 0.000196, epsilon: 0.010000, episode:  654\n",
      "frames: 1296000, reward: 19.300000, loss: 0.000505, epsilon: 0.010000, episode:  654\n",
      "frames: 1297000, reward: 18.700000, loss: 0.000447, epsilon: 0.010000, episode:  655\n",
      "frames: 1298000, reward: 18.700000, loss: 0.000370, epsilon: 0.010000, episode:  655\n",
      "frames: 1299000, reward: 18.700000, loss: 0.000242, epsilon: 0.010000, episode:  655\n",
      "frames: 1300000, reward: 19.000000, loss: 0.003447, epsilon: 0.010000, episode:  656\n",
      "frames: 1301000, reward: 19.000000, loss: 0.000487, epsilon: 0.010000, episode:  656\n",
      "frames: 1302000, reward: 19.000000, loss: 0.000514, epsilon: 0.010000, episode:  657\n",
      "frames: 1303000, reward: 19.000000, loss: 0.000296, epsilon: 0.010000, episode:  657\n",
      "frames: 1304000, reward: 19.300000, loss: 0.000412, epsilon: 0.010000, episode:  658\n",
      "frames: 1305000, reward: 19.300000, loss: 0.000271, epsilon: 0.010000, episode:  658\n",
      "frames: 1306000, reward: 19.500000, loss: 0.000983, epsilon: 0.010000, episode:  659\n",
      "frames: 1307000, reward: 19.500000, loss: 0.000321, epsilon: 0.010000, episode:  659\n",
      "frames: 1308000, reward: 19.400000, loss: 0.000153, epsilon: 0.010000, episode:  660\n",
      "frames: 1309000, reward: 19.400000, loss: 0.000258, epsilon: 0.010000, episode:  660\n",
      "frames: 1310000, reward: 19.000000, loss: 0.000614, epsilon: 0.010000, episode:  661\n",
      "frames: 1311000, reward: 19.000000, loss: 0.000923, epsilon: 0.010000, episode:  661\n",
      "frames: 1312000, reward: 19.000000, loss: 0.000486, epsilon: 0.010000, episode:  662\n",
      "frames: 1313000, reward: 19.000000, loss: 0.000362, epsilon: 0.010000, episode:  662\n",
      "frames: 1314000, reward: 19.200000, loss: 0.000328, epsilon: 0.010000, episode:  663\n",
      "frames: 1315000, reward: 19.200000, loss: 0.000308, epsilon: 0.010000, episode:  663\n",
      "frames: 1316000, reward: 18.800000, loss: 0.000620, epsilon: 0.010000, episode:  664\n",
      "frames: 1317000, reward: 18.800000, loss: 0.000386, epsilon: 0.010000, episode:  664\n",
      "frames: 1318000, reward: 18.800000, loss: 0.000740, epsilon: 0.010000, episode:  664\n",
      "frames: 1319000, reward: 19.400000, loss: 0.002102, epsilon: 0.010000, episode:  665\n",
      "frames: 1320000, reward: 19.400000, loss: 0.000217, epsilon: 0.010000, episode:  665\n",
      "frames: 1321000, reward: 19.400000, loss: 0.000359, epsilon: 0.010000, episode:  665\n",
      "frames: 1322000, reward: 18.600000, loss: 0.000245, epsilon: 0.010000, episode:  666\n",
      "frames: 1323000, reward: 18.600000, loss: 0.000480, epsilon: 0.010000, episode:  666\n",
      "frames: 1324000, reward: 18.600000, loss: 0.000361, epsilon: 0.010000, episode:  667\n",
      "frames: 1325000, reward: 18.600000, loss: 0.000542, epsilon: 0.010000, episode:  667\n",
      "frames: 1326000, reward: 18.600000, loss: 0.000334, epsilon: 0.010000, episode:  667\n",
      "frames: 1327000, reward: 18.100000, loss: 0.000605, epsilon: 0.010000, episode:  668\n",
      "frames: 1328000, reward: 18.100000, loss: 0.000838, epsilon: 0.010000, episode:  668\n",
      "frames: 1329000, reward: 18.100000, loss: 0.000584, epsilon: 0.010000, episode:  669\n",
      "frames: 1330000, reward: 18.100000, loss: 0.001110, epsilon: 0.010000, episode:  669\n",
      "frames: 1331000, reward: 18.100000, loss: 0.000279, epsilon: 0.010000, episode:  669\n",
      "frames: 1332000, reward: 17.400000, loss: 0.000229, epsilon: 0.010000, episode:  670\n",
      "frames: 1333000, reward: 17.400000, loss: 0.000461, epsilon: 0.010000, episode:  670\n",
      "frames: 1334000, reward: 17.800000, loss: 0.000397, epsilon: 0.010000, episode:  671\n",
      "frames: 1335000, reward: 17.800000, loss: 0.000154, epsilon: 0.010000, episode:  671\n",
      "frames: 1336000, reward: 17.800000, loss: 0.000661, epsilon: 0.010000, episode:  671\n",
      "frames: 1337000, reward: 16.400000, loss: 0.000345, epsilon: 0.010000, episode:  672\n",
      "frames: 1338000, reward: 16.400000, loss: 0.001169, epsilon: 0.010000, episode:  672\n",
      "frames: 1339000, reward: 16.100000, loss: 0.000273, epsilon: 0.010000, episode:  673\n",
      "frames: 1340000, reward: 16.100000, loss: 0.000508, epsilon: 0.010000, episode:  673\n",
      "frames: 1341000, reward: 16.000000, loss: 0.000325, epsilon: 0.010000, episode:  674\n",
      "frames: 1342000, reward: 16.000000, loss: 0.000454, epsilon: 0.010000, episode:  674\n",
      "frames: 1343000, reward: 16.000000, loss: 0.000726, epsilon: 0.010000, episode:  674\n",
      "frames: 1344000, reward: 15.500000, loss: 0.000374, epsilon: 0.010000, episode:  675\n",
      "frames: 1345000, reward: 15.500000, loss: 0.000259, epsilon: 0.010000, episode:  675\n",
      "frames: 1346000, reward: 15.500000, loss: 0.000541, epsilon: 0.010000, episode:  675\n",
      "frames: 1347000, reward: 16.300000, loss: 0.000336, epsilon: 0.010000, episode:  676\n",
      "frames: 1348000, reward: 16.300000, loss: 0.000446, epsilon: 0.010000, episode:  676\n",
      "frames: 1349000, reward: 16.300000, loss: 0.000255, epsilon: 0.010000, episode:  677\n",
      "frames: 1350000, reward: 16.300000, loss: 0.000725, epsilon: 0.010000, episode:  677\n",
      "frames: 1351000, reward: 16.300000, loss: 0.000374, epsilon: 0.010000, episode:  677\n",
      "frames: 1352000, reward: 16.600000, loss: 0.000775, epsilon: 0.010000, episode:  678\n",
      "frames: 1353000, reward: 16.600000, loss: 0.000211, epsilon: 0.010000, episode:  678\n",
      "frames: 1354000, reward: 16.500000, loss: 0.000255, epsilon: 0.010000, episode:  679\n",
      "frames: 1355000, reward: 16.500000, loss: 0.000363, epsilon: 0.010000, episode:  679\n",
      "frames: 1356000, reward: 17.100000, loss: 0.000340, epsilon: 0.010000, episode:  680\n",
      "frames: 1357000, reward: 17.100000, loss: 0.000435, epsilon: 0.010000, episode:  680\n",
      "frames: 1358000, reward: 17.000000, loss: 0.000569, epsilon: 0.010000, episode:  681\n",
      "frames: 1359000, reward: 17.000000, loss: 0.000750, epsilon: 0.010000, episode:  681\n",
      "frames: 1360000, reward: 18.200000, loss: 0.000441, epsilon: 0.010000, episode:  682\n",
      "frames: 1361000, reward: 18.200000, loss: 0.000481, epsilon: 0.010000, episode:  682\n",
      "frames: 1362000, reward: 18.200000, loss: 0.004092, epsilon: 0.010000, episode:  682\n",
      "frames: 1363000, reward: 17.200000, loss: 0.001288, epsilon: 0.010000, episode:  683\n",
      "frames: 1364000, reward: 17.200000, loss: 0.000403, epsilon: 0.010000, episode:  683\n",
      "frames: 1365000, reward: 17.200000, loss: 0.000489, epsilon: 0.010000, episode:  683\n",
      "frames: 1366000, reward: 17.100000, loss: 0.000610, epsilon: 0.010000, episode:  684\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 1367000, reward: 17.100000, loss: 0.000356, epsilon: 0.010000, episode:  684\n",
      "frames: 1368000, reward: 17.400000, loss: 0.002864, epsilon: 0.010000, episode:  685\n",
      "frames: 1369000, reward: 17.400000, loss: 0.000309, epsilon: 0.010000, episode:  685\n",
      "frames: 1370000, reward: 17.700000, loss: 0.000807, epsilon: 0.010000, episode:  686\n",
      "frames: 1371000, reward: 17.700000, loss: 0.000389, epsilon: 0.010000, episode:  686\n",
      "frames: 1372000, reward: 17.500000, loss: 0.000781, epsilon: 0.010000, episode:  687\n",
      "frames: 1373000, reward: 17.500000, loss: 0.000593, epsilon: 0.010000, episode:  687\n",
      "frames: 1374000, reward: 17.500000, loss: 0.000920, epsilon: 0.010000, episode:  688\n",
      "frames: 1375000, reward: 17.500000, loss: 0.000615, epsilon: 0.010000, episode:  688\n",
      "frames: 1376000, reward: 17.500000, loss: 0.000515, epsilon: 0.010000, episode:  689\n",
      "frames: 1377000, reward: 17.500000, loss: 0.000636, epsilon: 0.010000, episode:  689\n",
      "frames: 1378000, reward: 17.500000, loss: 0.000640, epsilon: 0.010000, episode:  689\n",
      "frames: 1379000, reward: 17.600000, loss: 0.000221, epsilon: 0.010000, episode:  690\n",
      "frames: 1380000, reward: 17.600000, loss: 0.000153, epsilon: 0.010000, episode:  690\n",
      "frames: 1381000, reward: 17.600000, loss: 0.001345, epsilon: 0.010000, episode:  690\n",
      "frames: 1382000, reward: 17.700000, loss: 0.000401, epsilon: 0.010000, episode:  691\n",
      "frames: 1383000, reward: 17.700000, loss: 0.000358, epsilon: 0.010000, episode:  691\n",
      "frames: 1384000, reward: 17.600000, loss: 0.000410, epsilon: 0.010000, episode:  692\n",
      "frames: 1385000, reward: 17.600000, loss: 0.000482, epsilon: 0.010000, episode:  692\n",
      "frames: 1386000, reward: 17.600000, loss: 0.000519, epsilon: 0.010000, episode:  692\n",
      "frames: 1387000, reward: 18.400000, loss: 0.000224, epsilon: 0.010000, episode:  693\n",
      "frames: 1388000, reward: 18.400000, loss: 0.000750, epsilon: 0.010000, episode:  693\n",
      "frames: 1389000, reward: 18.400000, loss: 0.000366, epsilon: 0.010000, episode:  693\n",
      "frames: 1390000, reward: 18.700000, loss: 0.000662, epsilon: 0.010000, episode:  694\n",
      "frames: 1391000, reward: 18.700000, loss: 0.000755, epsilon: 0.010000, episode:  694\n",
      "frames: 1392000, reward: 18.500000, loss: 0.000308, epsilon: 0.010000, episode:  695\n",
      "frames: 1393000, reward: 18.500000, loss: 0.000251, epsilon: 0.010000, episode:  695\n",
      "frames: 1394000, reward: 18.500000, loss: 0.000445, epsilon: 0.010000, episode:  695\n",
      "frames: 1395000, reward: 18.500000, loss: 0.000264, epsilon: 0.010000, episode:  695\n",
      "frames: 1396000, reward: 17.500000, loss: 0.000295, epsilon: 0.010000, episode:  696\n",
      "frames: 1397000, reward: 17.500000, loss: 0.000213, epsilon: 0.010000, episode:  697\n",
      "frames: 1398000, reward: 17.500000, loss: 0.000206, epsilon: 0.010000, episode:  697\n",
      "frames: 1399000, reward: 17.500000, loss: 0.000309, epsilon: 0.010000, episode:  697\n",
      "frames: 1400000, reward: 17.200000, loss: 0.001231, epsilon: 0.010000, episode:  698\n",
      "frames: 1401000, reward: 17.200000, loss: 0.000587, epsilon: 0.010000, episode:  698\n",
      "frames: 1402000, reward: 17.300000, loss: 0.000361, epsilon: 0.010000, episode:  699\n",
      "frames: 1403000, reward: 17.300000, loss: 0.000821, epsilon: 0.010000, episode:  699\n",
      "frames: 1404000, reward: 17.300000, loss: 0.000106, epsilon: 0.010000, episode:  700\n",
      "frames: 1405000, reward: 17.300000, loss: 0.000281, epsilon: 0.010000, episode:  700\n",
      "frames: 1406000, reward: 17.300000, loss: 0.000300, epsilon: 0.010000, episode:  700\n",
      "frames: 1407000, reward: 16.900000, loss: 0.000914, epsilon: 0.010000, episode:  701\n",
      "frames: 1408000, reward: 16.900000, loss: 0.003223, epsilon: 0.010000, episode:  701\n",
      "frames: 1409000, reward: 17.000000, loss: 0.000288, epsilon: 0.010000, episode:  702\n",
      "frames: 1410000, reward: 17.000000, loss: 0.000175, epsilon: 0.010000, episode:  702\n",
      "frames: 1411000, reward: 17.400000, loss: 0.000230, epsilon: 0.010000, episode:  703\n",
      "frames: 1412000, reward: 17.400000, loss: 0.000236, epsilon: 0.010000, episode:  703\n",
      "frames: 1413000, reward: 17.400000, loss: 0.000183, epsilon: 0.010000, episode:  704\n",
      "frames: 1414000, reward: 17.400000, loss: 0.000270, epsilon: 0.010000, episode:  704\n",
      "frames: 1415000, reward: 17.600000, loss: 0.000395, epsilon: 0.010000, episode:  705\n",
      "frames: 1416000, reward: 17.600000, loss: 0.000568, epsilon: 0.010000, episode:  705\n",
      "frames: 1417000, reward: 18.600000, loss: 0.000335, epsilon: 0.010000, episode:  706\n",
      "frames: 1418000, reward: 18.600000, loss: 0.000272, epsilon: 0.010000, episode:  706\n",
      "frames: 1419000, reward: 18.600000, loss: 0.000259, epsilon: 0.010000, episode:  706\n",
      "frames: 1420000, reward: 18.600000, loss: 0.000397, epsilon: 0.010000, episode:  706\n",
      "frames: 1421000, reward: 18.000000, loss: 0.001334, epsilon: 0.010000, episode:  707\n",
      "frames: 1422000, reward: 18.000000, loss: 0.000163, epsilon: 0.010000, episode:  707\n",
      "frames: 1423000, reward: 18.500000, loss: 0.000444, epsilon: 0.010000, episode:  708\n",
      "frames: 1424000, reward: 18.500000, loss: 0.000517, epsilon: 0.010000, episode:  708\n",
      "frames: 1425000, reward: 18.400000, loss: 0.000514, epsilon: 0.010000, episode:  709\n",
      "frames: 1426000, reward: 18.400000, loss: 0.000567, epsilon: 0.010000, episode:  709\n",
      "frames: 1427000, reward: 18.400000, loss: 0.000276, epsilon: 0.010000, episode:  709\n",
      "frames: 1428000, reward: 18.500000, loss: 0.000421, epsilon: 0.010000, episode:  710\n",
      "frames: 1429000, reward: 18.500000, loss: 0.000399, epsilon: 0.010000, episode:  710\n",
      "frames: 1430000, reward: 18.500000, loss: 0.000220, epsilon: 0.010000, episode:  711\n",
      "frames: 1431000, reward: 18.500000, loss: 0.000167, epsilon: 0.010000, episode:  711\n",
      "frames: 1432000, reward: 18.600000, loss: 0.000185, epsilon: 0.010000, episode:  712\n",
      "frames: 1433000, reward: 18.600000, loss: 0.001214, epsilon: 0.010000, episode:  712\n",
      "frames: 1434000, reward: 18.600000, loss: 0.000226, epsilon: 0.010000, episode:  712\n",
      "frames: 1435000, reward: 18.500000, loss: 0.000952, epsilon: 0.010000, episode:  713\n",
      "frames: 1436000, reward: 18.500000, loss: 0.000266, epsilon: 0.010000, episode:  713\n",
      "frames: 1437000, reward: 18.600000, loss: 0.000394, epsilon: 0.010000, episode:  714\n",
      "frames: 1438000, reward: 18.600000, loss: 0.000321, epsilon: 0.010000, episode:  714\n",
      "frames: 1439000, reward: 18.600000, loss: 0.002491, epsilon: 0.010000, episode:  715\n",
      "frames: 1440000, reward: 18.600000, loss: 0.000394, epsilon: 0.010000, episode:  715\n",
      "frames: 1441000, reward: 18.600000, loss: 0.000282, epsilon: 0.010000, episode:  715\n",
      "frames: 1442000, reward: 17.900000, loss: 0.000226, epsilon: 0.010000, episode:  716\n",
      "frames: 1443000, reward: 17.900000, loss: 0.000514, epsilon: 0.010000, episode:  716\n",
      "frames: 1444000, reward: 18.400000, loss: 0.000351, epsilon: 0.010000, episode:  717\n",
      "frames: 1445000, reward: 18.400000, loss: 0.000463, epsilon: 0.010000, episode:  717\n",
      "frames: 1446000, reward: 18.200000, loss: 0.000253, epsilon: 0.010000, episode:  718\n",
      "frames: 1447000, reward: 18.200000, loss: 0.000338, epsilon: 0.010000, episode:  718\n",
      "frames: 1448000, reward: 18.200000, loss: 0.001229, epsilon: 0.010000, episode:  718\n",
      "frames: 1449000, reward: 18.200000, loss: 0.000135, epsilon: 0.010000, episode:  719\n",
      "frames: 1450000, reward: 18.200000, loss: 0.000211, epsilon: 0.010000, episode:  719\n",
      "frames: 1451000, reward: 18.200000, loss: 0.000780, epsilon: 0.010000, episode:  719\n",
      "frames: 1452000, reward: 17.900000, loss: 0.000319, epsilon: 0.010000, episode:  720\n",
      "frames: 1453000, reward: 17.900000, loss: 0.000678, epsilon: 0.010000, episode:  720\n",
      "frames: 1454000, reward: 18.000000, loss: 0.001253, epsilon: 0.010000, episode:  721\n",
      "frames: 1455000, reward: 18.000000, loss: 0.000616, epsilon: 0.010000, episode:  721\n",
      "frames: 1456000, reward: 17.700000, loss: 0.000295, epsilon: 0.010000, episode:  722\n",
      "frames: 1457000, reward: 17.700000, loss: 0.000906, epsilon: 0.010000, episode:  722\n",
      "frames: 1458000, reward: 17.700000, loss: 0.000332, epsilon: 0.010000, episode:  722\n",
      "frames: 1459000, reward: 17.500000, loss: 0.004899, epsilon: 0.010000, episode:  723\n",
      "frames: 1460000, reward: 17.500000, loss: 0.000600, epsilon: 0.010000, episode:  723\n",
      "frames: 1461000, reward: 17.500000, loss: 0.002962, epsilon: 0.010000, episode:  723\n",
      "frames: 1462000, reward: 17.300000, loss: 0.000859, epsilon: 0.010000, episode:  724\n",
      "frames: 1463000, reward: 17.300000, loss: 0.000799, epsilon: 0.010000, episode:  724\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 1464000, reward: 17.100000, loss: 0.000379, epsilon: 0.010000, episode:  725\n",
      "frames: 1465000, reward: 17.100000, loss: 0.000514, epsilon: 0.010000, episode:  725\n",
      "frames: 1466000, reward: 17.100000, loss: 0.000371, epsilon: 0.010000, episode:  725\n",
      "frames: 1467000, reward: 17.600000, loss: 0.000409, epsilon: 0.010000, episode:  726\n",
      "frames: 1468000, reward: 17.600000, loss: 0.000371, epsilon: 0.010000, episode:  726\n",
      "frames: 1469000, reward: 17.600000, loss: 0.000247, epsilon: 0.010000, episode:  727\n",
      "frames: 1470000, reward: 17.600000, loss: 0.000285, epsilon: 0.010000, episode:  727\n",
      "frames: 1471000, reward: 17.600000, loss: 0.000178, epsilon: 0.010000, episode:  727\n",
      "frames: 1472000, reward: 17.100000, loss: 0.000854, epsilon: 0.010000, episode:  728\n",
      "frames: 1473000, reward: 17.100000, loss: 0.000397, epsilon: 0.010000, episode:  728\n",
      "frames: 1474000, reward: 16.600000, loss: 0.000205, epsilon: 0.010000, episode:  729\n",
      "frames: 1475000, reward: 16.600000, loss: 0.000396, epsilon: 0.010000, episode:  729\n",
      "frames: 1476000, reward: 16.600000, loss: 0.000299, epsilon: 0.010000, episode:  730\n",
      "frames: 1477000, reward: 16.600000, loss: 0.000610, epsilon: 0.010000, episode:  730\n",
      "frames: 1478000, reward: 16.900000, loss: 0.000327, epsilon: 0.010000, episode:  731\n",
      "frames: 1479000, reward: 16.900000, loss: 0.000618, epsilon: 0.010000, episode:  731\n",
      "frames: 1480000, reward: 17.300000, loss: 0.000440, epsilon: 0.010000, episode:  732\n",
      "frames: 1481000, reward: 17.300000, loss: 0.000304, epsilon: 0.010000, episode:  732\n",
      "frames: 1482000, reward: 17.800000, loss: 0.001040, epsilon: 0.010000, episode:  733\n",
      "frames: 1483000, reward: 17.800000, loss: 0.000324, epsilon: 0.010000, episode:  733\n",
      "frames: 1484000, reward: 17.800000, loss: 0.000329, epsilon: 0.010000, episode:  733\n",
      "frames: 1485000, reward: 17.700000, loss: 0.000606, epsilon: 0.010000, episode:  734\n",
      "frames: 1486000, reward: 17.700000, loss: 0.001183, epsilon: 0.010000, episode:  734\n",
      "frames: 1487000, reward: 17.700000, loss: 0.000834, epsilon: 0.010000, episode:  734\n",
      "frames: 1488000, reward: 17.800000, loss: 0.000695, epsilon: 0.010000, episode:  735\n",
      "frames: 1489000, reward: 17.800000, loss: 0.000320, epsilon: 0.010000, episode:  735\n",
      "frames: 1490000, reward: 17.700000, loss: 0.000550, epsilon: 0.010000, episode:  736\n",
      "frames: 1491000, reward: 17.700000, loss: 0.000335, epsilon: 0.010000, episode:  736\n",
      "frames: 1492000, reward: 17.300000, loss: 0.000228, epsilon: 0.010000, episode:  737\n",
      "frames: 1493000, reward: 17.300000, loss: 0.000847, epsilon: 0.010000, episode:  737\n",
      "frames: 1494000, reward: 18.000000, loss: 0.000256, epsilon: 0.010000, episode:  738\n",
      "frames: 1495000, reward: 18.000000, loss: 0.000310, epsilon: 0.010000, episode:  738\n",
      "frames: 1496000, reward: 18.500000, loss: 0.000551, epsilon: 0.010000, episode:  739\n",
      "frames: 1497000, reward: 18.500000, loss: 0.000651, epsilon: 0.010000, episode:  739\n",
      "frames: 1498000, reward: 18.500000, loss: 0.000318, epsilon: 0.010000, episode:  739\n",
      "frames: 1499000, reward: 18.800000, loss: 0.001861, epsilon: 0.010000, episode:  740\n",
      "frames: 1500000, reward: 18.800000, loss: 0.000363, epsilon: 0.010000, episode:  740\n",
      "frames: 1501000, reward: 18.800000, loss: 0.000617, epsilon: 0.010000, episode:  740\n",
      "frames: 1502000, reward: 18.400000, loss: 0.000741, epsilon: 0.010000, episode:  741\n",
      "frames: 1503000, reward: 18.400000, loss: 0.000314, epsilon: 0.010000, episode:  741\n",
      "frames: 1504000, reward: 18.000000, loss: 0.000213, epsilon: 0.010000, episode:  742\n",
      "frames: 1505000, reward: 18.000000, loss: 0.000574, epsilon: 0.010000, episode:  742\n",
      "frames: 1506000, reward: 18.000000, loss: 0.000431, epsilon: 0.010000, episode:  742\n",
      "frames: 1507000, reward: 17.300000, loss: 0.000642, epsilon: 0.010000, episode:  743\n",
      "frames: 1508000, reward: 17.300000, loss: 0.000972, epsilon: 0.010000, episode:  743\n",
      "frames: 1509000, reward: 17.300000, loss: 0.000693, epsilon: 0.010000, episode:  743\n",
      "frames: 1510000, reward: 17.200000, loss: 0.000459, epsilon: 0.010000, episode:  744\n",
      "frames: 1511000, reward: 17.200000, loss: 0.000294, epsilon: 0.010000, episode:  744\n",
      "frames: 1512000, reward: 17.300000, loss: 0.000982, epsilon: 0.010000, episode:  745\n",
      "frames: 1513000, reward: 17.300000, loss: 0.000362, epsilon: 0.010000, episode:  745\n",
      "frames: 1514000, reward: 17.300000, loss: 0.000210, epsilon: 0.010000, episode:  745\n",
      "frames: 1515000, reward: 17.400000, loss: 0.000232, epsilon: 0.010000, episode:  746\n",
      "frames: 1516000, reward: 17.400000, loss: 0.000366, epsilon: 0.010000, episode:  746\n",
      "frames: 1517000, reward: 18.000000, loss: 0.003312, epsilon: 0.010000, episode:  747\n",
      "frames: 1518000, reward: 18.000000, loss: 0.000918, epsilon: 0.010000, episode:  747\n",
      "frames: 1519000, reward: 17.700000, loss: 0.000760, epsilon: 0.010000, episode:  748\n",
      "frames: 1520000, reward: 17.700000, loss: 0.000420, epsilon: 0.010000, episode:  748\n",
      "frames: 1521000, reward: 17.800000, loss: 0.000637, epsilon: 0.010000, episode:  749\n",
      "frames: 1522000, reward: 17.800000, loss: 0.000490, epsilon: 0.010000, episode:  749\n",
      "frames: 1523000, reward: 17.600000, loss: 0.000138, epsilon: 0.010000, episode:  750\n",
      "frames: 1524000, reward: 17.600000, loss: 0.000414, epsilon: 0.010000, episode:  750\n",
      "frames: 1525000, reward: 17.800000, loss: 0.000538, epsilon: 0.010000, episode:  751\n",
      "frames: 1526000, reward: 17.800000, loss: 0.000352, epsilon: 0.010000, episode:  751\n",
      "frames: 1527000, reward: 18.200000, loss: 0.000410, epsilon: 0.010000, episode:  752\n",
      "frames: 1528000, reward: 18.200000, loss: 0.001090, epsilon: 0.010000, episode:  752\n",
      "frames: 1529000, reward: 18.900000, loss: 0.007645, epsilon: 0.010000, episode:  753\n",
      "frames: 1530000, reward: 18.900000, loss: 0.000498, epsilon: 0.010000, episode:  753\n",
      "frames: 1531000, reward: 19.500000, loss: 0.000281, epsilon: 0.010000, episode:  754\n",
      "frames: 1532000, reward: 19.600000, loss: 0.000248, epsilon: 0.010000, episode:  755\n",
      "frames: 1533000, reward: 19.600000, loss: 0.000550, epsilon: 0.010000, episode:  755\n",
      "frames: 1534000, reward: 19.600000, loss: 0.000470, epsilon: 0.010000, episode:  755\n",
      "frames: 1535000, reward: 19.700000, loss: 0.000424, epsilon: 0.010000, episode:  756\n",
      "frames: 1536000, reward: 19.700000, loss: 0.000240, epsilon: 0.010000, episode:  756\n",
      "frames: 1537000, reward: 19.700000, loss: 0.000391, epsilon: 0.010000, episode:  757\n",
      "frames: 1538000, reward: 19.700000, loss: 0.000461, epsilon: 0.010000, episode:  757\n",
      "frames: 1539000, reward: 20.000000, loss: 0.000304, epsilon: 0.010000, episode:  758\n",
      "frames: 1540000, reward: 20.000000, loss: 0.000248, epsilon: 0.010000, episode:  758\n",
      "frames: 1541000, reward: 20.000000, loss: 0.000277, epsilon: 0.010000, episode:  758\n",
      "frames: 1542000, reward: 20.000000, loss: 0.000556, epsilon: 0.010000, episode:  759\n",
      "frames: 1543000, reward: 20.000000, loss: 0.000206, epsilon: 0.010000, episode:  759\n",
      "frames: 1544000, reward: 20.300000, loss: 0.000636, epsilon: 0.010000, episode:  760\n",
      "frames: 1545000, reward: 20.600000, loss: 0.000145, epsilon: 0.010000, episode:  761\n",
      "frames: 1546000, reward: 20.600000, loss: 0.000585, epsilon: 0.010000, episode:  761\n",
      "frames: 1547000, reward: 20.600000, loss: 0.000559, epsilon: 0.010000, episode:  761\n",
      "frames: 1548000, reward: 20.500000, loss: 0.000372, epsilon: 0.010000, episode:  762\n",
      "frames: 1549000, reward: 20.500000, loss: 0.000354, epsilon: 0.010000, episode:  762\n",
      "frames: 1550000, reward: 20.000000, loss: 0.000359, epsilon: 0.010000, episode:  763\n",
      "frames: 1551000, reward: 20.000000, loss: 0.000474, epsilon: 0.010000, episode:  763\n",
      "frames: 1552000, reward: 20.000000, loss: 0.000250, epsilon: 0.010000, episode:  764\n",
      "frames: 1553000, reward: 20.000000, loss: 0.000301, epsilon: 0.010000, episode:  764\n",
      "frames: 1554000, reward: 20.100000, loss: 0.000284, epsilon: 0.010000, episode:  765\n",
      "frames: 1555000, reward: 20.100000, loss: 0.000211, epsilon: 0.010000, episode:  765\n",
      "frames: 1556000, reward: 20.000000, loss: 0.000877, epsilon: 0.010000, episode:  766\n",
      "frames: 1557000, reward: 20.000000, loss: 0.000252, epsilon: 0.010000, episode:  766\n",
      "frames: 1558000, reward: 20.100000, loss: 0.000347, epsilon: 0.010000, episode:  767\n",
      "frames: 1559000, reward: 20.100000, loss: 0.000211, epsilon: 0.010000, episode:  767\n",
      "frames: 1560000, reward: 20.100000, loss: 0.000272, epsilon: 0.010000, episode:  768\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 1561000, reward: 20.100000, loss: 0.000233, epsilon: 0.010000, episode:  768\n",
      "frames: 1562000, reward: 20.100000, loss: 0.000373, epsilon: 0.010000, episode:  768\n",
      "frames: 1563000, reward: 19.400000, loss: 0.000196, epsilon: 0.010000, episode:  769\n",
      "frames: 1564000, reward: 19.400000, loss: 0.000187, epsilon: 0.010000, episode:  770\n",
      "frames: 1565000, reward: 19.400000, loss: 0.000677, epsilon: 0.010000, episode:  770\n",
      "frames: 1566000, reward: 19.400000, loss: 0.000319, epsilon: 0.010000, episode:  770\n",
      "frames: 1567000, reward: 19.400000, loss: 0.000140, epsilon: 0.010000, episode:  771\n",
      "frames: 1568000, reward: 19.400000, loss: 0.000190, epsilon: 0.010000, episode:  771\n",
      "frames: 1569000, reward: 19.400000, loss: 0.000207, epsilon: 0.010000, episode:  772\n",
      "frames: 1570000, reward: 19.400000, loss: 0.000194, epsilon: 0.010000, episode:  772\n",
      "frames: 1571000, reward: 19.800000, loss: 0.000226, epsilon: 0.010000, episode:  773\n",
      "frames: 1572000, reward: 19.700000, loss: 0.000490, epsilon: 0.010000, episode:  774\n",
      "frames: 1573000, reward: 19.700000, loss: 0.000491, epsilon: 0.010000, episode:  774\n",
      "frames: 1574000, reward: 19.700000, loss: 0.000236, epsilon: 0.010000, episode:  774\n",
      "frames: 1575000, reward: 19.700000, loss: 0.000279, epsilon: 0.010000, episode:  775\n",
      "frames: 1576000, reward: 19.700000, loss: 0.000149, epsilon: 0.010000, episode:  775\n",
      "frames: 1577000, reward: 19.700000, loss: 0.000933, epsilon: 0.010000, episode:  775\n",
      "frames: 1578000, reward: 19.900000, loss: 0.000175, epsilon: 0.010000, episode:  776\n",
      "frames: 1579000, reward: 19.900000, loss: 0.000384, epsilon: 0.010000, episode:  776\n",
      "frames: 1580000, reward: 19.500000, loss: 0.000168, epsilon: 0.010000, episode:  777\n",
      "frames: 1581000, reward: 19.500000, loss: 0.000328, epsilon: 0.010000, episode:  777\n",
      "frames: 1582000, reward: 19.500000, loss: 0.000691, epsilon: 0.010000, episode:  778\n",
      "frames: 1583000, reward: 19.500000, loss: 0.000398, epsilon: 0.010000, episode:  778\n",
      "frames: 1584000, reward: 19.500000, loss: 0.000339, epsilon: 0.010000, episode:  778\n",
      "frames: 1585000, reward: 20.300000, loss: 0.000281, epsilon: 0.010000, episode:  779\n",
      "frames: 1586000, reward: 20.300000, loss: 0.000167, epsilon: 0.010000, episode:  779\n",
      "frames: 1587000, reward: 20.300000, loss: 0.000230, epsilon: 0.010000, episode:  779\n",
      "frames: 1588000, reward: 19.900000, loss: 0.000146, epsilon: 0.010000, episode:  780\n",
      "frames: 1589000, reward: 19.900000, loss: 0.000346, epsilon: 0.010000, episode:  780\n",
      "frames: 1590000, reward: 19.700000, loss: 0.000294, epsilon: 0.010000, episode:  781\n",
      "frames: 1591000, reward: 19.800000, loss: 0.000109, epsilon: 0.010000, episode:  782\n",
      "frames: 1592000, reward: 19.800000, loss: 0.000296, epsilon: 0.010000, episode:  782\n",
      "frames: 1593000, reward: 19.700000, loss: 0.000223, epsilon: 0.010000, episode:  783\n",
      "frames: 1594000, reward: 19.700000, loss: 0.000152, epsilon: 0.010000, episode:  783\n",
      "frames: 1595000, reward: 19.700000, loss: 0.000644, epsilon: 0.010000, episode:  783\n",
      "frames: 1596000, reward: 19.100000, loss: 0.000150, epsilon: 0.010000, episode:  784\n",
      "frames: 1597000, reward: 19.100000, loss: 0.000225, epsilon: 0.010000, episode:  784\n",
      "frames: 1598000, reward: 19.000000, loss: 0.000568, epsilon: 0.010000, episode:  785\n",
      "frames: 1599000, reward: 19.000000, loss: 0.000678, epsilon: 0.010000, episode:  785\n",
      "frames: 1600000, reward: 19.000000, loss: 0.000215, epsilon: 0.010000, episode:  785\n",
      "frames: 1601000, reward: 19.000000, loss: 0.000177, epsilon: 0.010000, episode:  786\n",
      "frames: 1602000, reward: 19.000000, loss: 0.000318, epsilon: 0.010000, episode:  786\n",
      "frames: 1603000, reward: 19.200000, loss: 0.000189, epsilon: 0.010000, episode:  787\n",
      "frames: 1604000, reward: 19.200000, loss: 0.000091, epsilon: 0.010000, episode:  787\n",
      "frames: 1605000, reward: 19.200000, loss: 0.000131, epsilon: 0.010000, episode:  787\n",
      "frames: 1606000, reward: 19.100000, loss: 0.000338, epsilon: 0.010000, episode:  788\n",
      "frames: 1607000, reward: 19.100000, loss: 0.000310, epsilon: 0.010000, episode:  788\n",
      "frames: 1608000, reward: 19.000000, loss: 0.000598, epsilon: 0.010000, episode:  789\n",
      "frames: 1609000, reward: 19.000000, loss: 0.000187, epsilon: 0.010000, episode:  789\n",
      "frames: 1610000, reward: 19.000000, loss: 0.000326, epsilon: 0.010000, episode:  789\n",
      "frames: 1611000, reward: 19.300000, loss: 0.000173, epsilon: 0.010000, episode:  790\n",
      "frames: 1612000, reward: 19.400000, loss: 0.000165, epsilon: 0.010000, episode:  791\n",
      "frames: 1613000, reward: 19.400000, loss: 0.000152, epsilon: 0.010000, episode:  791\n",
      "frames: 1614000, reward: 19.400000, loss: 0.000141, epsilon: 0.010000, episode:  791\n",
      "frames: 1615000, reward: 19.400000, loss: 0.001108, epsilon: 0.010000, episode:  792\n",
      "frames: 1616000, reward: 19.400000, loss: 0.000223, epsilon: 0.010000, episode:  792\n",
      "frames: 1617000, reward: 19.500000, loss: 0.000203, epsilon: 0.010000, episode:  793\n",
      "frames: 1618000, reward: 20.200000, loss: 0.000128, epsilon: 0.010000, episode:  794\n",
      "frames: 1619000, reward: 20.200000, loss: 0.000158, epsilon: 0.010000, episode:  794\n",
      "frames: 1620000, reward: 20.200000, loss: 0.001286, epsilon: 0.010000, episode:  794\n",
      "frames: 1621000, reward: 20.000000, loss: 0.000538, epsilon: 0.010000, episode:  795\n",
      "frames: 1622000, reward: 20.000000, loss: 0.000181, epsilon: 0.010000, episode:  795\n",
      "frames: 1623000, reward: 19.900000, loss: 0.000228, epsilon: 0.010000, episode:  796\n",
      "frames: 1624000, reward: 19.900000, loss: 0.000387, epsilon: 0.010000, episode:  796\n",
      "frames: 1625000, reward: 20.000000, loss: 0.000193, epsilon: 0.010000, episode:  797\n",
      "frames: 1626000, reward: 20.000000, loss: 0.000144, epsilon: 0.010000, episode:  797\n",
      "frames: 1627000, reward: 20.000000, loss: 0.000114, epsilon: 0.010000, episode:  797\n",
      "frames: 1628000, reward: 19.600000, loss: 0.000157, epsilon: 0.010000, episode:  798\n",
      "frames: 1629000, reward: 19.600000, loss: 0.000152, epsilon: 0.010000, episode:  798\n",
      "frames: 1630000, reward: 19.500000, loss: 0.000249, epsilon: 0.010000, episode:  799\n",
      "frames: 1631000, reward: 19.500000, loss: 0.000168, epsilon: 0.010000, episode:  799\n",
      "frames: 1632000, reward: 19.600000, loss: 0.000080, epsilon: 0.010000, episode:  800\n",
      "frames: 1633000, reward: 19.600000, loss: 0.000179, epsilon: 0.010000, episode:  800\n",
      "frames: 1634000, reward: 19.600000, loss: 0.000189, epsilon: 0.010000, episode:  801\n",
      "frames: 1635000, reward: 19.600000, loss: 0.000177, epsilon: 0.010000, episode:  801\n",
      "frames: 1636000, reward: 19.500000, loss: 0.002933, epsilon: 0.010000, episode:  802\n",
      "frames: 1637000, reward: 19.500000, loss: 0.000300, epsilon: 0.010000, episode:  802\n",
      "frames: 1638000, reward: 19.600000, loss: 0.000138, epsilon: 0.010000, episode:  803\n",
      "frames: 1639000, reward: 19.600000, loss: 0.000157, epsilon: 0.010000, episode:  803\n",
      "frames: 1640000, reward: 19.600000, loss: 0.000206, epsilon: 0.010000, episode:  804\n",
      "frames: 1641000, reward: 19.600000, loss: 0.000726, epsilon: 0.010000, episode:  804\n",
      "frames: 1642000, reward: 19.900000, loss: 0.000146, epsilon: 0.010000, episode:  805\n",
      "frames: 1643000, reward: 19.900000, loss: 0.000648, epsilon: 0.010000, episode:  805\n",
      "frames: 1644000, reward: 19.900000, loss: 0.000232, epsilon: 0.010000, episode:  805\n",
      "frames: 1645000, reward: 19.800000, loss: 0.001280, epsilon: 0.010000, episode:  806\n",
      "frames: 1646000, reward: 19.800000, loss: 0.000096, epsilon: 0.010000, episode:  806\n",
      "frames: 1647000, reward: 19.500000, loss: 0.000129, epsilon: 0.010000, episode:  807\n",
      "frames: 1648000, reward: 19.500000, loss: 0.000095, epsilon: 0.010000, episode:  807\n",
      "frames: 1649000, reward: 19.900000, loss: 0.000567, epsilon: 0.010000, episode:  808\n",
      "frames: 1650000, reward: 19.900000, loss: 0.000153, epsilon: 0.010000, episode:  808\n",
      "frames: 1651000, reward: 19.900000, loss: 0.000174, epsilon: 0.010000, episode:  808\n",
      "frames: 1652000, reward: 20.100000, loss: 0.000273, epsilon: 0.010000, episode:  809\n",
      "frames: 1653000, reward: 20.100000, loss: 0.000361, epsilon: 0.010000, episode:  809\n",
      "frames: 1654000, reward: 19.900000, loss: 0.000094, epsilon: 0.010000, episode:  810\n",
      "frames: 1655000, reward: 19.900000, loss: 0.000364, epsilon: 0.010000, episode:  810\n",
      "frames: 1656000, reward: 19.900000, loss: 0.000269, epsilon: 0.010000, episode:  811\n",
      "frames: 1657000, reward: 19.900000, loss: 0.000504, epsilon: 0.010000, episode:  811\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 1658000, reward: 19.900000, loss: 0.000749, epsilon: 0.010000, episode:  811\n",
      "frames: 1659000, reward: 19.900000, loss: 0.001851, epsilon: 0.010000, episode:  812\n",
      "frames: 1660000, reward: 19.900000, loss: 0.000212, epsilon: 0.010000, episode:  812\n",
      "frames: 1661000, reward: 19.700000, loss: 0.000134, epsilon: 0.010000, episode:  813\n",
      "frames: 1662000, reward: 19.700000, loss: 0.000654, epsilon: 0.010000, episode:  813\n",
      "frames: 1663000, reward: 19.500000, loss: 0.000220, epsilon: 0.010000, episode:  814\n",
      "frames: 1664000, reward: 19.500000, loss: 0.000125, epsilon: 0.010000, episode:  814\n",
      "frames: 1665000, reward: 19.400000, loss: 0.001163, epsilon: 0.010000, episode:  815\n",
      "frames: 1666000, reward: 19.500000, loss: 0.000111, epsilon: 0.010000, episode:  816\n",
      "frames: 1667000, reward: 19.500000, loss: 0.000226, epsilon: 0.010000, episode:  816\n",
      "frames: 1668000, reward: 19.500000, loss: 0.000221, epsilon: 0.010000, episode:  816\n",
      "frames: 1669000, reward: 19.300000, loss: 0.000174, epsilon: 0.010000, episode:  817\n",
      "frames: 1670000, reward: 19.300000, loss: 0.000160, epsilon: 0.010000, episode:  817\n",
      "frames: 1671000, reward: 19.300000, loss: 0.000175, epsilon: 0.010000, episode:  818\n",
      "frames: 1672000, reward: 19.300000, loss: 0.000597, epsilon: 0.010000, episode:  818\n",
      "frames: 1673000, reward: 19.300000, loss: 0.000291, epsilon: 0.010000, episode:  818\n",
      "frames: 1674000, reward: 19.300000, loss: 0.000693, epsilon: 0.010000, episode:  819\n",
      "frames: 1675000, reward: 19.300000, loss: 0.000373, epsilon: 0.010000, episode:  819\n",
      "frames: 1676000, reward: 19.300000, loss: 0.000180, epsilon: 0.010000, episode:  820\n",
      "frames: 1677000, reward: 19.300000, loss: 0.000145, epsilon: 0.010000, episode:  820\n",
      "frames: 1678000, reward: 19.300000, loss: 0.001180, epsilon: 0.010000, episode:  821\n",
      "frames: 1679000, reward: 19.300000, loss: 0.000118, epsilon: 0.010000, episode:  821\n",
      "frames: 1680000, reward: 19.300000, loss: 0.000124, epsilon: 0.010000, episode:  822\n",
      "frames: 1681000, reward: 19.300000, loss: 0.000600, epsilon: 0.010000, episode:  822\n",
      "frames: 1682000, reward: 19.400000, loss: 0.002195, epsilon: 0.010000, episode:  823\n",
      "frames: 1683000, reward: 19.400000, loss: 0.000224, epsilon: 0.010000, episode:  823\n",
      "frames: 1684000, reward: 19.400000, loss: 0.000120, epsilon: 0.010000, episode:  823\n",
      "frames: 1685000, reward: 19.400000, loss: 0.000808, epsilon: 0.010000, episode:  824\n",
      "frames: 1686000, reward: 19.400000, loss: 0.000168, epsilon: 0.010000, episode:  824\n",
      "frames: 1687000, reward: 19.300000, loss: 0.000220, epsilon: 0.010000, episode:  825\n",
      "frames: 1688000, reward: 19.300000, loss: 0.000201, epsilon: 0.010000, episode:  825\n",
      "frames: 1689000, reward: 19.300000, loss: 0.000111, epsilon: 0.010000, episode:  826\n",
      "frames: 1690000, reward: 19.300000, loss: 0.000087, epsilon: 0.010000, episode:  826\n",
      "frames: 1691000, reward: 19.700000, loss: 0.000177, epsilon: 0.010000, episode:  827\n",
      "frames: 1692000, reward: 19.700000, loss: 0.000138, epsilon: 0.010000, episode:  827\n",
      "frames: 1693000, reward: 19.500000, loss: 0.000135, epsilon: 0.010000, episode:  828\n",
      "frames: 1694000, reward: 19.500000, loss: 0.001127, epsilon: 0.010000, episode:  828\n",
      "frames: 1695000, reward: 19.500000, loss: 0.000261, epsilon: 0.010000, episode:  828\n",
      "frames: 1696000, reward: 19.200000, loss: 0.000407, epsilon: 0.010000, episode:  829\n",
      "frames: 1697000, reward: 19.200000, loss: 0.000294, epsilon: 0.010000, episode:  829\n",
      "frames: 1698000, reward: 19.300000, loss: 0.000218, epsilon: 0.010000, episode:  830\n",
      "frames: 1699000, reward: 19.300000, loss: 0.000300, epsilon: 0.010000, episode:  830\n",
      "frames: 1700000, reward: 19.200000, loss: 0.000227, epsilon: 0.010000, episode:  831\n",
      "frames: 1701000, reward: 19.200000, loss: 0.000239, epsilon: 0.010000, episode:  831\n",
      "frames: 1702000, reward: 19.300000, loss: 0.000220, epsilon: 0.010000, episode:  832\n",
      "frames: 1703000, reward: 19.300000, loss: 0.000723, epsilon: 0.010000, episode:  832\n",
      "frames: 1704000, reward: 19.400000, loss: 0.000268, epsilon: 0.010000, episode:  833\n",
      "frames: 1705000, reward: 19.400000, loss: 0.001675, epsilon: 0.010000, episode:  833\n",
      "frames: 1706000, reward: 19.400000, loss: 0.000211, epsilon: 0.010000, episode:  833\n",
      "frames: 1707000, reward: 19.300000, loss: 0.000285, epsilon: 0.010000, episode:  834\n",
      "frames: 1708000, reward: 19.500000, loss: 0.000270, epsilon: 0.010000, episode:  835\n",
      "frames: 1709000, reward: 19.500000, loss: 0.000479, epsilon: 0.010000, episode:  835\n",
      "frames: 1710000, reward: 19.500000, loss: 0.000136, epsilon: 0.010000, episode:  836\n",
      "frames: 1711000, reward: 19.500000, loss: 0.000574, epsilon: 0.010000, episode:  836\n",
      "frames: 1712000, reward: 19.500000, loss: 0.000371, epsilon: 0.010000, episode:  836\n",
      "frames: 1713000, reward: 19.700000, loss: 0.000363, epsilon: 0.010000, episode:  837\n",
      "frames: 1714000, reward: 19.700000, loss: 0.001598, epsilon: 0.010000, episode:  837\n",
      "frames: 1715000, reward: 19.800000, loss: 0.000164, epsilon: 0.010000, episode:  838\n",
      "frames: 1716000, reward: 19.800000, loss: 0.000214, epsilon: 0.010000, episode:  838\n",
      "frames: 1717000, reward: 19.900000, loss: 0.000156, epsilon: 0.010000, episode:  839\n",
      "frames: 1718000, reward: 19.900000, loss: 0.000265, epsilon: 0.010000, episode:  839\n",
      "frames: 1719000, reward: 19.900000, loss: 0.000124, epsilon: 0.010000, episode:  839\n",
      "frames: 1720000, reward: 19.900000, loss: 0.000131, epsilon: 0.010000, episode:  840\n",
      "frames: 1721000, reward: 19.900000, loss: 0.000549, epsilon: 0.010000, episode:  840\n",
      "frames: 1722000, reward: 20.000000, loss: 0.000110, epsilon: 0.010000, episode:  841\n",
      "frames: 1723000, reward: 20.000000, loss: 0.000279, epsilon: 0.010000, episode:  841\n",
      "frames: 1724000, reward: 19.900000, loss: 0.000156, epsilon: 0.010000, episode:  842\n",
      "frames: 1725000, reward: 19.900000, loss: 0.000157, epsilon: 0.010000, episode:  842\n",
      "frames: 1726000, reward: 19.900000, loss: 0.000270, epsilon: 0.010000, episode:  843\n",
      "frames: 1727000, reward: 19.900000, loss: 0.000196, epsilon: 0.010000, episode:  843\n",
      "frames: 1728000, reward: 19.900000, loss: 0.000134, epsilon: 0.010000, episode:  844\n",
      "frames: 1729000, reward: 19.900000, loss: 0.000168, epsilon: 0.010000, episode:  844\n",
      "frames: 1730000, reward: 19.800000, loss: 0.000380, epsilon: 0.010000, episode:  845\n",
      "frames: 1731000, reward: 19.800000, loss: 0.000284, epsilon: 0.010000, episode:  845\n",
      "frames: 1732000, reward: 19.800000, loss: 0.000210, epsilon: 0.010000, episode:  846\n",
      "frames: 1733000, reward: 19.800000, loss: 0.000365, epsilon: 0.010000, episode:  846\n",
      "frames: 1734000, reward: 19.800000, loss: 0.000204, epsilon: 0.010000, episode:  846\n",
      "frames: 1735000, reward: 19.800000, loss: 0.000172, epsilon: 0.010000, episode:  847\n",
      "frames: 1736000, reward: 19.800000, loss: 0.000115, epsilon: 0.010000, episode:  847\n",
      "frames: 1737000, reward: 20.000000, loss: 0.000204, epsilon: 0.010000, episode:  848\n",
      "frames: 1738000, reward: 20.000000, loss: 0.000781, epsilon: 0.010000, episode:  848\n",
      "frames: 1739000, reward: 20.200000, loss: 0.000163, epsilon: 0.010000, episode:  849\n",
      "frames: 1740000, reward: 20.200000, loss: 0.000155, epsilon: 0.010000, episode:  849\n",
      "frames: 1741000, reward: 20.300000, loss: 0.000217, epsilon: 0.010000, episode:  850\n",
      "frames: 1742000, reward: 20.300000, loss: 0.000320, epsilon: 0.010000, episode:  850\n",
      "frames: 1743000, reward: 20.300000, loss: 0.000190, epsilon: 0.010000, episode:  851\n",
      "frames: 1744000, reward: 20.300000, loss: 0.000246, epsilon: 0.010000, episode:  851\n",
      "frames: 1745000, reward: 20.300000, loss: 0.000102, epsilon: 0.010000, episode:  851\n",
      "frames: 1746000, reward: 20.300000, loss: 0.000143, epsilon: 0.010000, episode:  852\n",
      "frames: 1747000, reward: 20.300000, loss: 0.000134, epsilon: 0.010000, episode:  852\n",
      "frames: 1748000, reward: 20.200000, loss: 0.000063, epsilon: 0.010000, episode:  853\n",
      "frames: 1749000, reward: 20.200000, loss: 0.000125, epsilon: 0.010000, episode:  853\n",
      "frames: 1750000, reward: 20.400000, loss: 0.000100, epsilon: 0.010000, episode:  854\n",
      "frames: 1751000, reward: 20.400000, loss: 0.000173, epsilon: 0.010000, episode:  854\n",
      "frames: 1752000, reward: 20.400000, loss: 0.000105, epsilon: 0.010000, episode:  855\n",
      "frames: 1753000, reward: 20.400000, loss: 0.000101, epsilon: 0.010000, episode:  855\n",
      "frames: 1754000, reward: 20.500000, loss: 0.000171, epsilon: 0.010000, episode:  856\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 1755000, reward: 20.500000, loss: 0.000174, epsilon: 0.010000, episode:  856\n",
      "frames: 1756000, reward: 20.500000, loss: 0.000217, epsilon: 0.010000, episode:  857\n",
      "frames: 1757000, reward: 20.300000, loss: 0.000096, epsilon: 0.010000, episode:  858\n",
      "frames: 1758000, reward: 20.300000, loss: 0.000125, epsilon: 0.010000, episode:  858\n",
      "frames: 1759000, reward: 20.300000, loss: 0.000130, epsilon: 0.010000, episode:  858\n",
      "frames: 1760000, reward: 20.200000, loss: 0.000112, epsilon: 0.010000, episode:  859\n",
      "frames: 1761000, reward: 20.200000, loss: 0.000069, epsilon: 0.010000, episode:  859\n",
      "frames: 1762000, reward: 19.900000, loss: 0.000297, epsilon: 0.010000, episode:  860\n",
      "frames: 1763000, reward: 19.900000, loss: 0.000266, epsilon: 0.010000, episode:  860\n",
      "frames: 1764000, reward: 20.000000, loss: 0.000110, epsilon: 0.010000, episode:  861\n",
      "frames: 1765000, reward: 20.000000, loss: 0.000083, epsilon: 0.010000, episode:  861\n",
      "frames: 1766000, reward: 20.000000, loss: 0.000132, epsilon: 0.010000, episode:  862\n",
      "frames: 1767000, reward: 20.000000, loss: 0.000117, epsilon: 0.010000, episode:  862\n",
      "frames: 1768000, reward: 20.100000, loss: 0.000129, epsilon: 0.010000, episode:  863\n",
      "frames: 1769000, reward: 20.100000, loss: 0.000124, epsilon: 0.010000, episode:  863\n",
      "frames: 1770000, reward: 20.100000, loss: 0.000168, epsilon: 0.010000, episode:  864\n",
      "frames: 1771000, reward: 20.100000, loss: 0.000127, epsilon: 0.010000, episode:  864\n",
      "frames: 1772000, reward: 20.000000, loss: 0.000109, epsilon: 0.010000, episode:  865\n",
      "frames: 1773000, reward: 20.000000, loss: 0.000122, epsilon: 0.010000, episode:  865\n",
      "frames: 1774000, reward: 19.600000, loss: 0.000246, epsilon: 0.010000, episode:  866\n",
      "frames: 1775000, reward: 19.600000, loss: 0.000175, epsilon: 0.010000, episode:  866\n",
      "frames: 1776000, reward: 19.600000, loss: 0.000127, epsilon: 0.010000, episode:  867\n",
      "frames: 1777000, reward: 19.600000, loss: 0.000274, epsilon: 0.010000, episode:  867\n",
      "frames: 1778000, reward: 19.500000, loss: 0.000189, epsilon: 0.010000, episode:  868\n",
      "frames: 1779000, reward: 19.500000, loss: 0.000127, epsilon: 0.010000, episode:  868\n",
      "frames: 1780000, reward: 19.500000, loss: 0.000159, epsilon: 0.010000, episode:  868\n",
      "frames: 1781000, reward: 19.500000, loss: 0.000200, epsilon: 0.010000, episode:  869\n",
      "frames: 1782000, reward: 19.500000, loss: 0.000147, epsilon: 0.010000, episode:  869\n",
      "frames: 1783000, reward: 19.800000, loss: 0.000092, epsilon: 0.010000, episode:  870\n",
      "frames: 1784000, reward: 19.800000, loss: 0.000835, epsilon: 0.010000, episode:  870\n",
      "frames: 1785000, reward: 19.500000, loss: 0.000130, epsilon: 0.010000, episode:  871\n",
      "frames: 1786000, reward: 19.500000, loss: 0.000094, epsilon: 0.010000, episode:  871\n",
      "frames: 1787000, reward: 19.600000, loss: 0.000543, epsilon: 0.010000, episode:  872\n",
      "frames: 1788000, reward: 19.600000, loss: 0.000161, epsilon: 0.010000, episode:  872\n",
      "frames: 1789000, reward: 19.500000, loss: 0.000104, epsilon: 0.010000, episode:  873\n",
      "frames: 1790000, reward: 19.500000, loss: 0.000156, epsilon: 0.010000, episode:  873\n",
      "frames: 1791000, reward: 19.400000, loss: 0.000160, epsilon: 0.010000, episode:  874\n",
      "frames: 1792000, reward: 19.400000, loss: 0.000091, epsilon: 0.010000, episode:  874\n",
      "frames: 1793000, reward: 19.500000, loss: 0.000163, epsilon: 0.010000, episode:  875\n",
      "frames: 1794000, reward: 19.500000, loss: 0.000226, epsilon: 0.010000, episode:  875\n",
      "frames: 1795000, reward: 19.900000, loss: 0.000156, epsilon: 0.010000, episode:  876\n",
      "frames: 1796000, reward: 19.900000, loss: 0.000078, epsilon: 0.010000, episode:  876\n",
      "frames: 1797000, reward: 19.900000, loss: 0.000090, epsilon: 0.010000, episode:  877\n",
      "frames: 1798000, reward: 19.900000, loss: 0.000052, epsilon: 0.010000, episode:  877\n",
      "frames: 1799000, reward: 20.200000, loss: 0.000727, epsilon: 0.010000, episode:  878\n",
      "frames: 1800000, reward: 20.200000, loss: 0.000296, epsilon: 0.010000, episode:  878\n",
      "frames: 1801000, reward: 20.300000, loss: 0.000124, epsilon: 0.010000, episode:  879\n",
      "frames: 1802000, reward: 20.300000, loss: 0.000058, epsilon: 0.010000, episode:  879\n",
      "frames: 1803000, reward: 20.300000, loss: 0.000172, epsilon: 0.010000, episode:  879\n",
      "frames: 1804000, reward: 20.300000, loss: 0.000156, epsilon: 0.010000, episode:  880\n",
      "frames: 1805000, reward: 20.300000, loss: 0.000129, epsilon: 0.010000, episode:  880\n",
      "frames: 1806000, reward: 20.600000, loss: 0.000241, epsilon: 0.010000, episode:  881\n",
      "frames: 1807000, reward: 20.600000, loss: 0.000130, epsilon: 0.010000, episode:  881\n",
      "frames: 1808000, reward: 20.600000, loss: 0.000169, epsilon: 0.010000, episode:  881\n",
      "frames: 1809000, reward: 20.600000, loss: 0.000156, epsilon: 0.010000, episode:  882\n",
      "frames: 1810000, reward: 20.600000, loss: 0.000119, epsilon: 0.010000, episode:  882\n",
      "frames: 1811000, reward: 20.700000, loss: 0.000083, epsilon: 0.010000, episode:  883\n",
      "frames: 1812000, reward: 20.700000, loss: 0.000157, epsilon: 0.010000, episode:  883\n",
      "frames: 1813000, reward: 20.900000, loss: 0.000195, epsilon: 0.010000, episode:  884\n",
      "frames: 1814000, reward: 21.000000, loss: 0.000111, epsilon: 0.010000, episode:  885\n",
      "frames: 1815000, reward: 21.000000, loss: 0.000228, epsilon: 0.010000, episode:  885\n",
      "frames: 1816000, reward: 21.000000, loss: 0.000096, epsilon: 0.010000, episode:  885\n",
      "frames: 1817000, reward: 21.000000, loss: 0.000080, epsilon: 0.010000, episode:  886\n",
      "frames: 1818000, reward: 21.000000, loss: 0.000649, epsilon: 0.010000, episode:  886\n",
      "frames: 1819000, reward: 20.900000, loss: 0.000066, epsilon: 0.010000, episode:  887\n",
      "frames: 1820000, reward: 20.900000, loss: 0.000077, epsilon: 0.010000, episode:  887\n",
      "frames: 1821000, reward: 20.900000, loss: 0.000108, epsilon: 0.010000, episode:  888\n",
      "frames: 1822000, reward: 20.900000, loss: 0.000174, epsilon: 0.010000, episode:  888\n",
      "frames: 1823000, reward: 20.900000, loss: 0.000051, epsilon: 0.010000, episode:  888\n",
      "frames: 1824000, reward: 20.700000, loss: 0.000214, epsilon: 0.010000, episode:  889\n",
      "frames: 1825000, reward: 20.700000, loss: 0.000106, epsilon: 0.010000, episode:  889\n",
      "frames: 1826000, reward: 20.400000, loss: 0.000136, epsilon: 0.010000, episode:  890\n",
      "frames: 1827000, reward: 20.400000, loss: 0.000234, epsilon: 0.010000, episode:  890\n",
      "frames: 1828000, reward: 20.300000, loss: 0.000101, epsilon: 0.010000, episode:  891\n",
      "frames: 1829000, reward: 20.300000, loss: 0.000192, epsilon: 0.010000, episode:  891\n",
      "frames: 1830000, reward: 20.300000, loss: 0.000065, epsilon: 0.010000, episode:  891\n",
      "frames: 1831000, reward: 20.000000, loss: 0.000054, epsilon: 0.010000, episode:  892\n",
      "frames: 1832000, reward: 19.800000, loss: 0.000088, epsilon: 0.010000, episode:  893\n",
      "frames: 1833000, reward: 19.800000, loss: 0.000084, epsilon: 0.010000, episode:  893\n",
      "frames: 1834000, reward: 19.800000, loss: 0.000139, epsilon: 0.010000, episode:  893\n",
      "frames: 1835000, reward: 19.800000, loss: 0.000111, epsilon: 0.010000, episode:  894\n",
      "frames: 1836000, reward: 19.800000, loss: 0.000095, epsilon: 0.010000, episode:  894\n",
      "frames: 1837000, reward: 19.800000, loss: 0.000169, epsilon: 0.010000, episode:  895\n",
      "frames: 1838000, reward: 19.800000, loss: 0.000225, epsilon: 0.010000, episode:  895\n",
      "frames: 1839000, reward: 19.800000, loss: 0.000097, epsilon: 0.010000, episode:  895\n",
      "frames: 1840000, reward: 19.800000, loss: 0.000066, epsilon: 0.010000, episode:  896\n",
      "frames: 1841000, reward: 19.800000, loss: 0.000084, epsilon: 0.010000, episode:  896\n",
      "frames: 1842000, reward: 19.800000, loss: 0.000068, epsilon: 0.010000, episode:  896\n",
      "frames: 1843000, reward: 19.900000, loss: 0.000139, epsilon: 0.010000, episode:  897\n",
      "frames: 1844000, reward: 19.900000, loss: 0.000166, epsilon: 0.010000, episode:  897\n",
      "frames: 1845000, reward: 19.800000, loss: 0.000446, epsilon: 0.010000, episode:  898\n",
      "frames: 1846000, reward: 19.800000, loss: 0.000711, epsilon: 0.010000, episode:  898\n",
      "frames: 1847000, reward: 19.800000, loss: 0.000089, epsilon: 0.010000, episode:  898\n",
      "frames: 1848000, reward: 19.400000, loss: 0.000153, epsilon: 0.010000, episode:  899\n",
      "frames: 1849000, reward: 19.400000, loss: 0.000771, epsilon: 0.010000, episode:  899\n",
      "frames: 1850000, reward: 19.700000, loss: 0.000288, epsilon: 0.010000, episode:  900\n",
      "frames: 1851000, reward: 19.700000, loss: 0.000138, epsilon: 0.010000, episode:  900\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 1852000, reward: 19.800000, loss: 0.000099, epsilon: 0.010000, episode:  901\n",
      "frames: 1853000, reward: 19.800000, loss: 0.000099, epsilon: 0.010000, episode:  901\n",
      "frames: 1854000, reward: 19.800000, loss: 0.000103, epsilon: 0.010000, episode:  901\n",
      "frames: 1855000, reward: 19.800000, loss: 0.000130, epsilon: 0.010000, episode:  902\n",
      "frames: 1856000, reward: 19.800000, loss: 0.000118, epsilon: 0.010000, episode:  902\n",
      "frames: 1857000, reward: 19.800000, loss: 0.000188, epsilon: 0.010000, episode:  902\n",
      "frames: 1858000, reward: 20.000000, loss: 0.000055, epsilon: 0.010000, episode:  903\n",
      "frames: 1859000, reward: 20.000000, loss: 0.000995, epsilon: 0.010000, episode:  904\n",
      "frames: 1860000, reward: 20.000000, loss: 0.000101, epsilon: 0.010000, episode:  904\n",
      "frames: 1861000, reward: 19.900000, loss: 0.000101, epsilon: 0.010000, episode:  905\n",
      "frames: 1862000, reward: 19.900000, loss: 0.000167, epsilon: 0.010000, episode:  905\n",
      "frames: 1863000, reward: 19.800000, loss: 0.000090, epsilon: 0.010000, episode:  906\n",
      "frames: 1864000, reward: 19.800000, loss: 0.000061, epsilon: 0.010000, episode:  906\n",
      "frames: 1865000, reward: 19.700000, loss: 0.000162, epsilon: 0.010000, episode:  907\n",
      "frames: 1866000, reward: 19.700000, loss: 0.000084, epsilon: 0.010000, episode:  907\n",
      "frames: 1867000, reward: 19.700000, loss: 0.000092, epsilon: 0.010000, episode:  907\n",
      "frames: 1868000, reward: 19.800000, loss: 0.000159, epsilon: 0.010000, episode:  908\n",
      "frames: 1869000, reward: 19.800000, loss: 0.000271, epsilon: 0.010000, episode:  908\n",
      "frames: 1870000, reward: 20.400000, loss: 0.000136, epsilon: 0.010000, episode:  909\n",
      "frames: 1871000, reward: 20.400000, loss: 0.000180, epsilon: 0.010000, episode:  909\n",
      "frames: 1872000, reward: 20.400000, loss: 0.000175, epsilon: 0.010000, episode:  909\n",
      "frames: 1873000, reward: 20.300000, loss: 0.000136, epsilon: 0.010000, episode:  910\n",
      "frames: 1874000, reward: 20.300000, loss: 0.000091, epsilon: 0.010000, episode:  910\n",
      "frames: 1875000, reward: 20.200000, loss: 0.000225, epsilon: 0.010000, episode:  911\n",
      "frames: 1876000, reward: 20.200000, loss: 0.000105, epsilon: 0.010000, episode:  911\n",
      "frames: 1877000, reward: 20.200000, loss: 0.000097, epsilon: 0.010000, episode:  911\n",
      "frames: 1878000, reward: 20.200000, loss: 0.000228, epsilon: 0.010000, episode:  912\n",
      "frames: 1879000, reward: 20.200000, loss: 0.000368, epsilon: 0.010000, episode:  912\n",
      "frames: 1880000, reward: 20.000000, loss: 0.000137, epsilon: 0.010000, episode:  913\n",
      "frames: 1881000, reward: 20.000000, loss: 0.000081, epsilon: 0.010000, episode:  913\n",
      "frames: 1882000, reward: 19.900000, loss: 0.000097, epsilon: 0.010000, episode:  914\n",
      "frames: 1883000, reward: 19.900000, loss: 0.000110, epsilon: 0.010000, episode:  914\n",
      "frames: 1884000, reward: 20.000000, loss: 0.000093, epsilon: 0.010000, episode:  915\n",
      "frames: 1885000, reward: 20.000000, loss: 0.000097, epsilon: 0.010000, episode:  915\n",
      "frames: 1886000, reward: 19.900000, loss: 0.000200, epsilon: 0.010000, episode:  916\n",
      "frames: 1887000, reward: 19.900000, loss: 0.000244, epsilon: 0.010000, episode:  916\n",
      "frames: 1888000, reward: 19.900000, loss: 0.000307, epsilon: 0.010000, episode:  916\n",
      "frames: 1889000, reward: 19.800000, loss: 0.000190, epsilon: 0.010000, episode:  917\n",
      "frames: 1890000, reward: 19.800000, loss: 0.000333, epsilon: 0.010000, episode:  918\n",
      "frames: 1891000, reward: 19.800000, loss: 0.000177, epsilon: 0.010000, episode:  918\n",
      "frames: 1892000, reward: 19.600000, loss: 0.000263, epsilon: 0.010000, episode:  919\n",
      "frames: 1893000, reward: 19.600000, loss: 0.000094, epsilon: 0.010000, episode:  919\n",
      "frames: 1894000, reward: 19.600000, loss: 0.001053, epsilon: 0.010000, episode:  920\n",
      "frames: 1895000, reward: 19.600000, loss: 0.000077, epsilon: 0.010000, episode:  920\n",
      "frames: 1896000, reward: 19.500000, loss: 0.000092, epsilon: 0.010000, episode:  921\n",
      "frames: 1897000, reward: 19.500000, loss: 0.000135, epsilon: 0.010000, episode:  921\n",
      "frames: 1898000, reward: 19.800000, loss: 0.000095, epsilon: 0.010000, episode:  922\n",
      "frames: 1899000, reward: 19.800000, loss: 0.000579, epsilon: 0.010000, episode:  922\n",
      "frames: 1900000, reward: 20.000000, loss: 0.000082, epsilon: 0.010000, episode:  923\n",
      "frames: 1901000, reward: 20.000000, loss: 0.000111, epsilon: 0.010000, episode:  923\n",
      "frames: 1902000, reward: 20.000000, loss: 0.000117, epsilon: 0.010000, episode:  923\n",
      "frames: 1903000, reward: 20.100000, loss: 0.000169, epsilon: 0.010000, episode:  924\n",
      "frames: 1904000, reward: 20.100000, loss: 0.000284, epsilon: 0.010000, episode:  924\n",
      "frames: 1905000, reward: 20.100000, loss: 0.000158, epsilon: 0.010000, episode:  925\n",
      "frames: 1906000, reward: 20.100000, loss: 0.000067, epsilon: 0.010000, episode:  925\n",
      "frames: 1907000, reward: 20.300000, loss: 0.000134, epsilon: 0.010000, episode:  926\n",
      "frames: 1908000, reward: 20.300000, loss: 0.000112, epsilon: 0.010000, episode:  926\n",
      "frames: 1909000, reward: 20.100000, loss: 0.000115, epsilon: 0.010000, episode:  927\n",
      "frames: 1910000, reward: 20.100000, loss: 0.000157, epsilon: 0.010000, episode:  927\n",
      "frames: 1911000, reward: 19.900000, loss: 0.000194, epsilon: 0.010000, episode:  928\n",
      "frames: 1912000, reward: 19.900000, loss: 0.000062, epsilon: 0.010000, episode:  928\n",
      "frames: 1913000, reward: 20.000000, loss: 0.000069, epsilon: 0.010000, episode:  929\n",
      "frames: 1914000, reward: 20.000000, loss: 0.000445, epsilon: 0.010000, episode:  929\n",
      "frames: 1915000, reward: 20.100000, loss: 0.005514, epsilon: 0.010000, episode:  930\n",
      "frames: 1916000, reward: 20.100000, loss: 0.000048, epsilon: 0.010000, episode:  930\n",
      "frames: 1917000, reward: 20.100000, loss: 0.000223, epsilon: 0.010000, episode:  930\n",
      "frames: 1918000, reward: 20.100000, loss: 0.000150, epsilon: 0.010000, episode:  931\n",
      "frames: 1919000, reward: 20.100000, loss: 0.000081, epsilon: 0.010000, episode:  932\n",
      "frames: 1920000, reward: 20.100000, loss: 0.000684, epsilon: 0.010000, episode:  932\n",
      "frames: 1921000, reward: 20.100000, loss: 0.000350, epsilon: 0.010000, episode:  933\n",
      "frames: 1922000, reward: 20.100000, loss: 0.000064, epsilon: 0.010000, episode:  933\n",
      "frames: 1923000, reward: 19.900000, loss: 0.000062, epsilon: 0.010000, episode:  934\n",
      "frames: 1924000, reward: 19.900000, loss: 0.000082, epsilon: 0.010000, episode:  934\n",
      "frames: 1925000, reward: 19.900000, loss: 0.000069, epsilon: 0.010000, episode:  935\n",
      "frames: 1926000, reward: 19.900000, loss: 0.000129, epsilon: 0.010000, episode:  935\n",
      "frames: 1927000, reward: 19.900000, loss: 0.000108, epsilon: 0.010000, episode:  936\n",
      "frames: 1928000, reward: 19.900000, loss: 0.000085, epsilon: 0.010000, episode:  936\n",
      "frames: 1929000, reward: 19.900000, loss: 0.001426, epsilon: 0.010000, episode:  936\n",
      "frames: 1930000, reward: 20.300000, loss: 0.000150, epsilon: 0.010000, episode:  937\n",
      "frames: 1931000, reward: 20.500000, loss: 0.000104, epsilon: 0.010000, episode:  938\n",
      "frames: 1932000, reward: 20.500000, loss: 0.000078, epsilon: 0.010000, episode:  938\n",
      "frames: 1933000, reward: 20.600000, loss: 0.000078, epsilon: 0.010000, episode:  939\n",
      "frames: 1934000, reward: 20.600000, loss: 0.000155, epsilon: 0.010000, episode:  939\n",
      "frames: 1935000, reward: 20.400000, loss: 0.000153, epsilon: 0.010000, episode:  940\n",
      "frames: 1936000, reward: 20.600000, loss: 0.000185, epsilon: 0.010000, episode:  941\n",
      "frames: 1937000, reward: 20.600000, loss: 0.000347, epsilon: 0.010000, episode:  941\n",
      "frames: 1938000, reward: 20.300000, loss: 0.000275, epsilon: 0.010000, episode:  942\n",
      "frames: 1939000, reward: 20.300000, loss: 0.000265, epsilon: 0.010000, episode:  942\n",
      "frames: 1940000, reward: 20.300000, loss: 0.000097, epsilon: 0.010000, episode:  943\n",
      "frames: 1941000, reward: 20.300000, loss: 0.000158, epsilon: 0.010000, episode:  943\n",
      "frames: 1942000, reward: 20.400000, loss: 0.000208, epsilon: 0.010000, episode:  944\n",
      "frames: 1943000, reward: 20.400000, loss: 0.000142, epsilon: 0.010000, episode:  944\n",
      "frames: 1944000, reward: 20.400000, loss: 0.000134, epsilon: 0.010000, episode:  944\n",
      "frames: 1945000, reward: 20.400000, loss: 0.000388, epsilon: 0.010000, episode:  945\n",
      "frames: 1946000, reward: 20.400000, loss: 0.000072, epsilon: 0.010000, episode:  946\n",
      "frames: 1947000, reward: 20.400000, loss: 0.000429, epsilon: 0.010000, episode:  946\n",
      "frames: 1948000, reward: 20.400000, loss: 0.000197, epsilon: 0.010000, episode:  946\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frames: 1949000, reward: 20.400000, loss: 0.000184, epsilon: 0.010000, episode:  947\n",
      "frames: 1950000, reward: 20.400000, loss: 0.000644, epsilon: 0.010000, episode:  947\n",
      "frames: 1951000, reward: 20.200000, loss: 0.000183, epsilon: 0.010000, episode:  948\n",
      "frames: 1952000, reward: 20.200000, loss: 0.000117, epsilon: 0.010000, episode:  948\n",
      "frames: 1953000, reward: 20.200000, loss: 0.000082, epsilon: 0.010000, episode:  948\n",
      "frames: 1954000, reward: 20.200000, loss: 0.000134, epsilon: 0.010000, episode:  949\n",
      "frames: 1955000, reward: 20.200000, loss: 0.000211, epsilon: 0.010000, episode:  949\n",
      "frames: 1956000, reward: 20.300000, loss: 0.000120, epsilon: 0.010000, episode:  950\n",
      "frames: 1957000, reward: 20.300000, loss: 0.000085, epsilon: 0.010000, episode:  950\n",
      "frames: 1958000, reward: 20.200000, loss: 0.000061, epsilon: 0.010000, episode:  951\n",
      "frames: 1959000, reward: 20.200000, loss: 0.000074, epsilon: 0.010000, episode:  951\n",
      "frames: 1960000, reward: 20.500000, loss: 0.000076, epsilon: 0.010000, episode:  952\n",
      "frames: 1961000, reward: 20.500000, loss: 0.000334, epsilon: 0.010000, episode:  952\n",
      "frames: 1962000, reward: 20.500000, loss: 0.000338, epsilon: 0.010000, episode:  953\n",
      "frames: 1963000, reward: 20.500000, loss: 0.000074, epsilon: 0.010000, episode:  953\n",
      "frames: 1964000, reward: 20.600000, loss: 0.000125, epsilon: 0.010000, episode:  954\n",
      "frames: 1965000, reward: 20.600000, loss: 0.000091, epsilon: 0.010000, episode:  955\n",
      "frames: 1966000, reward: 20.600000, loss: 0.000089, epsilon: 0.010000, episode:  955\n",
      "frames: 1967000, reward: 20.600000, loss: 0.000198, epsilon: 0.010000, episode:  956\n",
      "frames: 1968000, reward: 20.600000, loss: 0.000051, epsilon: 0.010000, episode:  956\n",
      "frames: 1969000, reward: 20.600000, loss: 0.000589, epsilon: 0.010000, episode:  957\n",
      "frames: 1970000, reward: 20.600000, loss: 0.001086, epsilon: 0.010000, episode:  957\n",
      "frames: 1971000, reward: 20.600000, loss: 0.000193, epsilon: 0.010000, episode:  958\n",
      "frames: 1972000, reward: 20.600000, loss: 0.000220, epsilon: 0.010000, episode:  958\n",
      "frames: 1973000, reward: 20.600000, loss: 0.000106, epsilon: 0.010000, episode:  959\n",
      "frames: 1974000, reward: 20.600000, loss: 0.000136, epsilon: 0.010000, episode:  959\n",
      "frames: 1975000, reward: 20.600000, loss: 0.000268, epsilon: 0.010000, episode:  959\n",
      "frames: 1976000, reward: 20.400000, loss: 0.000484, epsilon: 0.010000, episode:  960\n",
      "frames: 1977000, reward: 20.400000, loss: 0.000448, epsilon: 0.010000, episode:  960\n",
      "frames: 1978000, reward: 20.500000, loss: 0.001175, epsilon: 0.010000, episode:  961\n",
      "frames: 1979000, reward: 20.500000, loss: 0.000085, epsilon: 0.010000, episode:  961\n",
      "frames: 1980000, reward: 20.500000, loss: 0.000219, epsilon: 0.010000, episode:  961\n",
      "frames: 1981000, reward: 20.500000, loss: 0.000066, epsilon: 0.010000, episode:  962\n",
      "frames: 1982000, reward: 20.500000, loss: 0.000255, epsilon: 0.010000, episode:  962\n",
      "frames: 1983000, reward: 20.500000, loss: 0.000367, epsilon: 0.010000, episode:  963\n",
      "frames: 1984000, reward: 20.500000, loss: 0.000113, epsilon: 0.010000, episode:  964\n",
      "frames: 1985000, reward: 20.500000, loss: 0.000222, epsilon: 0.010000, episode:  964\n",
      "frames: 1986000, reward: 20.400000, loss: 0.000069, epsilon: 0.010000, episode:  965\n",
      "frames: 1987000, reward: 20.400000, loss: 0.000097, epsilon: 0.010000, episode:  965\n",
      "frames: 1988000, reward: 20.300000, loss: 0.000144, epsilon: 0.010000, episode:  966\n",
      "frames: 1989000, reward: 20.300000, loss: 0.000166, epsilon: 0.010000, episode:  966\n",
      "frames: 1990000, reward: 20.200000, loss: 0.000127, epsilon: 0.010000, episode:  967\n",
      "frames: 1991000, reward: 20.200000, loss: 0.000050, epsilon: 0.010000, episode:  967\n",
      "frames: 1992000, reward: 20.300000, loss: 0.000605, epsilon: 0.010000, episode:  968\n",
      "frames: 1993000, reward: 20.300000, loss: 0.000138, epsilon: 0.010000, episode:  968\n",
      "frames: 1994000, reward: 20.300000, loss: 0.000708, epsilon: 0.010000, episode:  968\n",
      "frames: 1995000, reward: 19.700000, loss: 0.000107, epsilon: 0.010000, episode:  969\n",
      "frames: 1996000, reward: 19.700000, loss: 0.000085, epsilon: 0.010000, episode:  969\n",
      "frames: 1997000, reward: 19.600000, loss: 0.000039, epsilon: 0.010000, episode:  970\n",
      "frames: 1998000, reward: 19.600000, loss: 0.000166, epsilon: 0.010000, episode:  970\n",
      "frames: 1999000, reward: 19.600000, loss: 0.000110, epsilon: 0.010000, episode:  971\n"
     ]
    }
   ],
   "source": [
    "# if __name__ == '__main__':\n",
    "    \n",
    "# Training DQN in PongNoFrameskip-v4 \n",
    "env = make_atari('PongNoFrameskip-v4')\n",
    "env = wrap_deepmind(env, scale = False, frame_stack=True)\n",
    "\n",
    "gamma = 0.99\n",
    "epsilon_max = 1\n",
    "epsilon_min = 0.05\n",
    "eps_decay = 30000\n",
    "frames = 2000000\n",
    "USE_CUDA = True\n",
    "learning_rate = 2e-4\n",
    "max_buff = 100000\n",
    "update_tar_interval = 1000\n",
    "batch_size = 32\n",
    "print_interval = 1000\n",
    "log_interval = 1000\n",
    "learning_start = 10000\n",
    "win_reward = 18     # Pong-v4\n",
    "win_break = True\n",
    "\n",
    "action_space = env.action_space\n",
    "action_dim = env.action_space.n\n",
    "state_dim = env.observation_space.shape[0]\n",
    "state_channel = env.observation_space.shape[2]\n",
    "agent = DQNAgent(in_channels = state_channel, action_space= action_space, USE_CUDA = USE_CUDA, lr = learning_rate, memory_size = max_buff)\n",
    "\n",
    "frame = env.reset()\n",
    "\n",
    "episode_reward = 0\n",
    "all_rewards = []\n",
    "losses = []\n",
    "episode_num = 0\n",
    "is_win = False\n",
    "# tensorboard\n",
    "summary_writer = SummaryWriter(log_dir = \"DQN_stackframe\", comment= \"good_makeatari\")\n",
    "\n",
    "# e-greedy decay\n",
    "epsilon_by_frame = lambda frame_idx: epsilon_min + (epsilon_max - epsilon_min) * math.exp(\n",
    "            -1. * frame_idx / eps_decay)\n",
    "# plt.plot([epsilon_by_frame(i) for i in range(10000)])\n",
    "\n",
    "for i in range(frames):\n",
    "    epsilon = epsilon_by_frame(i)\n",
    "    state_tensor = agent.observe(frame)\n",
    "    action = agent.act(state_tensor, epsilon)\n",
    "    \n",
    "    next_frame, reward, done, _ = env.step(action)\n",
    "    \n",
    "    episode_reward += reward\n",
    "    agent.memory_buffer.push(frame, action, reward, next_frame, done)\n",
    "    frame = next_frame\n",
    "    \n",
    "    loss = 0\n",
    "    if agent.memory_buffer.size() >= learning_start:\n",
    "        loss = agent.learn_from_experience(batch_size)\n",
    "        losses.append(loss)\n",
    "\n",
    "    if i % print_interval == 0:\n",
    "        print(\"frames: %5d, reward: %5f, loss: %4f, epsilon: %5f, episode: %4d\" % (i, np.mean(all_rewards[-10:]), loss, epsilon, episode_num))\n",
    "        summary_writer.add_scalar(\"Temporal Difference Loss\", loss, i)\n",
    "        summary_writer.add_scalar(\"Mean Reward\", np.mean(all_rewards[-10:]), i)\n",
    "        summary_writer.add_scalar(\"Epsilon\", epsilon, i)\n",
    "        \n",
    "    if i % update_tar_interval == 0:\n",
    "        agent.DQN_target.load_state_dict(agent.DQN.state_dict())\n",
    "    \n",
    "    if done:\n",
    "        \n",
    "        frame = env.reset()\n",
    "        \n",
    "        all_rewards.append(episode_reward)\n",
    "        episode_reward = 0\n",
    "        episode_num += 1\n",
    "        avg_reward = float(np.mean(all_rewards[-100:]))\n",
    "\n",
    "summary_writer.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAvwAAAFMCAYAAAC3emhnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAABlR0lEQVR4nO3dd5hU5dk/8O+9sx2WpXcQEERRsSE2NPaC5iXRmGi6r4nxF01MD4mxvEmMxiQajUZj1NhbjIUo9gaCIB3pLLDAUpeyC9t3du7fH+ec2TMzZ2bO9Pb9XBcXM6c+Z2Z25j7PuZ/7iKqCiIiIiIjyU1GmG0BERERERKnDgJ+IiIiIKI8x4CciIiIiymMM+ImIiIiI8hgDfiIiIiKiPMaAn4iIiIgojzHgz3EiMl5ElojIQRH5YabbQ9lLRB4Tkd9nuh1ERNlORGpF5NxMt4MoWRjw575fAPhQVatU9d5MNyaYiDwkImtFxCci3w6aVyYid4vIdhHZLyJ/F5ES2/wjROR9EWkUkRoR+WLQ+t8xpzeJyJsiMtQ2r7eIPC4iu81/twate6qIfGqeKC0XkSm2eSIiN4rIFhE5ICLPiUivZL822UxEhojIDPO9UREZFTR/mIi8KiL7RKRORK6Nsr0BIvKMiDSY7/XTKT0AIiIi8mPAn/sOAbAy3EwR8aSxLU6WAfg+gMUO86YDmATgKACHATgewG8AQESKAbwK4DUAfQFcA+ApETnMnP85AH8AMM2cvwnAs7Zt3w2gEsAoAJMBfENErjLX7QtgBoA/AegN4E4A/xWRPua63wTwDQCnARgKoALA3+I5ePM40i4J77sPwJsALgsz/ykYr/kgABcD+IOInBVhey8B2Anj8zoQwJ8TbB8RERG5xIA/h4nI+wDOAnCf2ct9mJm28YCIzBSRZgBnicjFZtrPARHZau/tFpFRZg/uVea8/SJyrYicaPZ8N4jIfUH7/V8RWW0u+5aIHBKujap6v6q+B6DNYfbnAdyrqvtUtR7AvQD+15x3OIxg+25V7VLV9wHMgRGIW+v+W1VXqmoHgN8BOENEDrXNv1NVW1S1FsAjtm2fCmCXqv7b3PZTAOoBXGpb9xFV3aqqTQD+COArIlIZ9s0IfT2vFpEtAN6P9JqJyP+JyN/MxyUi0iwid5rPK0SkzToREZF/i8hO84rHLBE50rZfp/f9OBFZbF7FeB5AebT2W1R1l6r+HcACh2PsCeBMALepaqeqLgPwou31DV7+fAAjAPxcVRvNdZa4bQsRUaaIcSX6r+bVzu3m4zJzXn8Rec38ndwnIrNFpMic90sR2WZ+/64VkXMyeyRU6Bjw5zBVPRvAbADXq2pPVV1nzvoqgNsAVAH4GEAzjF7r3jB6Y/+fiHwhaHMnARgH4CsA/grgRgDnAjgSwJfNHnWY6/0aRnA8wNz/s4iPmP/sz4eLSHXQdPv8oyKsC9t8OMwPt66bbZfBeH3c+hyAIwBcEOU1+whG8AwAJ8LoBf+c+fwUAGtVdb/5/A2zDQNhXDEJTouxv++fAngFwJMwroD8G0G99eaP1BTEToL+tx4f5bAsAJwMYC2Ax0Vkr4gssD5PRERZ7kYY32HHAjgGxhXj35jzfgqgDsb3+iAY3/MqIuMBXA/gRFWtAnABgNq0tpooCAP+/PSqqs5RVZ+qtqnqh6r6mfl8OYxgMzjg+p257NswThCeVdXdqroNRoB6nLnc9wDcrqqrVdULI63m2Ei9/BG8AeAGM797MABr0HElgDUAdgP4udnzfb7ZZquXfSaME5GJIlIB4GYAapv/JoDpIlIlImNh9D5b8+YCGCoiV5rb/haAQ23z3wDwHbO3vhrAL23tcutWVW1W1VZEfs0+ATBORPoBOAPGlYhhZi/652CcEAAAVPVRVT2oqu0AbgVwjNk+i/99h/HjVALgr2aP+osI6q1X1d6q+nEMx2StdxDG1ZabRKRcRI6HcTIR7vUZDuB8AB8AGAzgLwBeFZH+se6biCjNvgbgt+bvYT2A/0P3leZOAEMAHGJ+z85WVQXQBaOTaIKIlKhqrapuyEjriUwM+PPTVvsTETlJRD4QkXoRaQRwLYDgYGuX7XGrw/Oe5uNDANxj9g43ANgHo3d3WBztvA3AEgBLYQThr8D4At2tqp0AvgDjisROGD0pL8DoTYGZJnQLgP8A2Ayj9+SgNR/GyUMrgPUwxgI8a1t3L4zc/5+Yx3khgHdt6z5qLv8hjPERH5jTrflu2N+DsK+ZeUKwEEZwfwaMAH8ujPED/oBfRDwicoeIbBCRA+juLbK/j/Z9DgWwzfzxsWyOof3RfA3AaHOfD8C42hDu9WkFUKuqj5g/is+Z652WxPYQEaXCUAR+d242pwHGOLAaAG+LyEYRmQ4AqloD4EcwOmZ2i1H4YSiIMogBf37SoOfPwBikOkJVqwE8COeUGTe2Avie2Tts/atQ1bkxN1K1VVWvV9VhqjoGwF4Ai1S1y5y/XFU/p6r9VPUCAGNgpKpY69+vquNUdSCMwL8YwApz3j5V/ZqqDlbVI2F81u3rfqSqJ6pqXxi9NeOt+eaVkFtUdZSqDocR9G8z/7k+PNvjaK/ZRwDOhnEVZYH5/AIYl45nmct8FcZJyrkAqmEMRgYC30f7PnfAuFJgnz8yhvZHpKqbVfUSVR2gqicB6Afb6xtkOUI/k0REuWA7jE4by0hzGswrrj81f78+D+AnVq6+qj6jqlPMdRXGWDCijGHAXxiqAOxT1TYRmQwjeIzXgwB+ZQ0YFZFqEbk83MIiUioi5TAC0xIzBcQa1DRMRIaK4WQAN8HotbfWnWguXykiP4Nx6fQxc165iBxlrjsSwEMA7rHy3UXkUBHpZ/aMXwSjys/vbds+zkzn6QWjYkydqr5lzutrri8iMgHAXTAu6fpS9Jp9BGOMxSpzAPKHAL4DYJN5CRkw3sN2GCdFlTDSgiL5BIAXwA9FpFhELoVxAuGa+b6VmU/LzOfWvCPMdKlSEfk6jJSdu8Js6mUAfUTkW+b78SUYV4TmxNIeIqIMeBbAb8zU0/4w0kefAgARuURExpodKwdgpPJ0iXF/nLPFGNzbBuMqZ1eG2k8EgAF/ofg+gN+KyEEYX1YvxLshVX0ZRk/Fc2ZqyQoAF0VY5W0YX3anwgjKW2GkrgBG3vxcGGMGHgcw3RxDYPkGjJ7q3QDOAXCemb8OGBVnngHQBKNn+RMYJwyWEwB8BiPN53YAX1NVe/nSXwDYA6P3fQgAe43//jDGCDTDyOd/VFUfsmaKyIMi8mCEYw7g4jWbC6P0p9WbvwrGj8Qs2zJPwLiUvM2cPy/KPjtgDBL+NoD9MAZjv2RfRozKTqdH2EwrjNcXMMZUtNrmXQBgo7ntawFcaDs5Cdi2qu4D8D8AfgagEUY51mmquifSMRARZYHfw0i7XA7jN2UxujuPxsFIB22C8Rv0d1X9EEZHyR0wfmN2wii08Ou0tpooiASm+BIRERERUT5hDz8RERERUR5jwE9ERERElMcY8BMRERER5TEG/EREREREeYwBPxERERFRHivOdAPs+vfvr6NGjcp0M4iIstKiRYv2qOqATLcjk/g7QUTkLNJvRFYF/KNGjcLChQsz3QwioqwkIpsz3YZM4+8EEZGzSL8RTOkhIiIiIspjDPiJiIiIiPIYA34iIiIiojzGgJ+IiIiIKI8x4CciIiIiymMM+ImIiIiI8hgDfiIiIiKiPMaAn4iIiIgojzHgJyIiIiLKY1l1p13KHT6fYu6GvZgyrj8AoMunmLthD04f53hH5wCqio/W1cNTJBjepxJFAhzSrwcAoKndi5cW1+GwQVU4eUy/gPVaOrx4Z9UunH34QGze24JBvcoxoKoM63YdRFV5MYZUVwAA5tbsQWVZMYb1rsCAqrKIbVlQuw9HDu2FRZv3Y8rY/hARqCo+rtnjfz6nZg8mj+6LFdsaMWZAT1RXlPjX/3TTPhw9rBo1u5uweucBjOhTicMHV2Hr/hZMHN47ZH/vrtqFvc3tmHbsMJSXeAAAe5ra8cmGvTh6WDW8PsWuA20Y2bcSH66rx4CeZZg8ui/eXLETPcuL0bPMg6b2LqgqGls7MbS6Am3eLgzrXYHjRvYBAMyp2YO2zi4AQP3Bdpw2tj/mb9qHnmUeFBcVoaLUg237WzFxRDUOH9wLe5vasb2hDUcPr8bG+iaUeIowom9lQLt3Nrbhw7W7MXXiEPQqL0GwbQ2tWL61AYcNrkKpbf3Glk68tWonDh3QE0u27MeAqjJsa2gFABw7ojc27G5CsacIA6vK0LuyFOMG9cSHa+sxsm8l+vUoxWfbGtHu7cLIvpUY1a8H+vUsw5qdB9C7ohSDq8sBAMu2NmBB7T4M7FWOM8cPwLqdB7GtoRWlniKcPKYfZq2vx+Be5dh9sB0Dq8qweV8LRvXrgbr9LWj3+gAAB9s6MWXsAIgAS7c2oKLEg+2NrTh+ZB/sOtCG8yYMwtyavehVUYL6g+1oau/EuEFVWFS7H50+H3qVl6CxtROlniKIAIN6laOp3Yvmdi98qhhcXYGzxg/AjGXbcdLofhg7sGfEzyUln8+nmLOh+++aiKiQMOCnuDzy8SbcNnM1Hv7mJJw7YRAem1uL3722Cv/4xgm44MjBEdd9Y8VOfP/pxQHTau+4GADw/acXY9a6+oBplpcWb8NvXlmBX1w4Hne+uRb9e5Zh4W/Oxfl3z/Ivv2jzfnz14fkAgL49SrH4pvPCtmNHYysuf/AT//M7Lj0aV0weif8u34EfPrsEv//CUThiSC987eH5+H9nHooHPtyAY0b0xqvXnQbACIK//I9PcMnEIXht+Y6Q7Qe3f3tDK77zxEIAQHmJB9OOHQYA+N1rq/Dq0u0RXzM3au+4GPM37sXXzON3u84X/z4XW/a1oPaOi3H2Xz5ybPvJt78HAHhxUR1e/H+nhmznB88sxuItDQHbBYBnF2zBHW+scd0eT5Ggy6eO80b1q8SHPz8LF/51NoqLBDV/mAoAmHb/nLDbO31cf8xev8fl3tfg2BG9sXRrQ8iccQN7Yv3uJpfbcfa3K4/DjS+vwB8vO5oBfwY8NX8zbn51Je776nG4ZOLQTDeHiCitmNJDcdm8rxkAsONAGwBgZ6PRa7t5b3PUdXc0toWdt2TL/rDzGlo6AAAH27wAjJ7xYLsOdG97X3NHxHY0tHQGPN990NjedrMHesu+Fv8+1u08CMDoTba0dBjtWLGtMeJ+LFa7gx9v2dfian03dh4I/9qGE8v+F4d5f+zBvl1LR1dMbQkX7ANA7d7udnojLGe3vC7yezPj+tPwr6tO9D9vbvc6LhdPsD/EvAJh2Wt+lorYu5wRW8zPz46G2P9GiIhyXcIBv4iMEJEPRGS1iKwUkRvM6X1F5B0RWW/+3yfx5lK2EBhBi6oReFWY6SmtHb6o6xYXhQ94Oru61/cFBXWdXeo4Pdz60Xi7ArdTWWocg9U869gAoEtD92kFbi5jzwDWyQIAJDP88zm00/W6Lg4k1mPt8rl/P9zQGI+vsbUz4vzR/XvgEFv6Uiyfn2j69SwNaovxnhd7GPATEVF6JaOH3wvgp6p6BICTAVwnIhMATAfwnqqOA/Ce+ZzyhBUUW0FiuRkst3Q695AGrBsh4LcH4cG9uF4zeLTyrp10drkPCDuDgtHKUiPDzTqZ8Wn38TkFulbAHy5IjBScNrV393wnM584kXi1I4nBriX4pCpRkd77ePQoLUbPsu7Mxo4kbr9vj8DxIwfajJMP9vATEVG6JRzwq+oOVV1sPj4IYDWAYQCmAXjcXOxxAF9IdF+UPSSod7vS38MfPYUjUg+/PcgPTu+wgvnIAX/8Pfw9yoxjEH8PvzGI2HgcGrhavf6tnc7HHHySYG9bU1tqevgT6VFPScAfz+WPCOypUMlQVCToYQ/4k/ga9K0MHNxsXW3wRPj8ExERpUJSB+2KyCgAxwGYD2CQqu4AjJMCERmYzH1Ras1aV49+PUvx74V1uPT4YTh8cC+8vKQOl58wAv9ZXIcnPqkFAMxYth1fPG6Yv3c8Us52Q0sHnvxkM+r2tzruLzj94sZXPkOv8hKcPKYfSjzi7321p8M88OEG/+PVOw7gxpc/C9jGx+v3YPSAHnjjsx3Yuq8FI/pW4pRD+2HXgTaUFXsCln18bi0mjerrP5l5YeFWNHdYAX/3cq8u3YbZ6/egxEzNaAsT8H/94fkY1KsMRSI46/CBGNanwj/v0TmbsPNAK44cWo2Fm8OPW4jFyu2NWFAb27Yenr3R/9jeu/38gi247PjhWFbXiLLiwH6BFdsacdSw6ojbbevswoxl2/HIx5tiak80t/53pf/x395bn5TBr1YqFwDsaYo87iMWfXoEpvS8uKgOQOQT3lwkIhcCuAeAB8DDqnpH0PzDAfwLwPEAblTVP7tdl4iIkiNpAb+I9ATwHwA/UtUDbtMUROQaANcAwMiRI5PVHErQNx/91P/4sbm1+OE543Dve+uxdGsjnv10i3/e0q0N+N6Ti3DF5BEAIve+v7NqF/7yzrqo+7O8tHibf/8A8PWTjc+H/aTij292V4C56J7ZIdv4+iPzMaCqDPUHQwf4PmYbrAkYA0/vfmcdJgzpBaC7dx8I7J2/4bmlAeu1dTof8ycb93Yfy5JteO6akwPmz/xsJ2Z+ttNx3XhcfO/HMa/z+9dX+x/bj/GX//kM7V4fbn51Zcg6l/zt45AqPsH+MHM1nvhkc8ztieZ1WzWkcJ8ly5gBPbCxPvog8lSVaBw/qMpxej6l9IiIB8D9AM4DUAdggYjMUNVVtsX2Afghgq7yulyXiIiSIClVekSkBEaw/7SqvmRO3iUiQ8z5QwDsdlpXVR9S1UmqOmnAgOg13CkzrAojG+tDq5XU1DdFrK5iaUswP7rTa+wj1oGbTsE+4JxfvqepHU7xWLhe/FgkMz88FYLbl0j6zPagSii1d1yMOy49OmDa9WeNdVz3lxce7n/8xeOGxbX/08b2w/s/PRObbp/qavnpFx3uOP2eK47F984YE/P+Lz1+GA61XX14xSzlCuRdSs9kADWqulFVOwA8ByOd009Vd6vqAgDBI6ijrktERMmRjCo9AuARAKtV9S7brBkAvmU+/haAVxPdF2WeU756a0eXP+UlUjCeaMBrpdfEmhZe6nH+mHsd8t27fOrYA9scY3lJJ9ke8AdfnQlO5YmFU7Wg4EA33ODtipLu/ZbEWdHGeg/d9t6H+4wUFxWhNM7XwX549tcyzwL+YQC22p7XmdNSvS4REcUgGT38pwH4BoCzRWSp+W8qgDsAnCci62FcsmVuZo6IFLQ7Dcpt83b5B7BGisUTDXitVJ5YS09WlHocpzulH3m71LGH/2Bb5PKObrR5Ez9pSKXg8RclYYLgYE6fF6fBusGBbriw175qscs2BIs1bSZcUF8k4U8GYmF/LfMs4Hc6GLd/oK7XFZFrRGShiCysr6933TgiIjIknMOvqh8j/G/3OYlun9LPqbSlNcVpUK6qraJOhJ/6RAN+K6c+1h7+cL3EjgG/z+f4YT7Qmnh1mFhvQpVuwTedchvwO6VzOVULCgn4w3xr2E/oSuIMjmNdLVJQH28Pv/1r0f4Z9ORRDj+MXvkRtufDAbi9bbTrdVX1IQAPAcCkSZOSW/qJiKgAJLVKD+W+f87aiDPHhx9Lsa0htMIOAPzmlRUAgE17mnHTKyswtHcFxg3siY/W1eO7p4/ByH6VCd/U6NNN+wAYFX1iEe4E4U7bgF/Lgtr9KC4KDfDCld6MxaIYK+ik29cenh/w3OlOxpZrn1yEsw4fgJNG98ObK0MHHi90OFa3Pdv2CwZuTzqCJdLDX+opCijPGW8b7OzHnmc9/AsAjBOR0QC2AbgCwFfTsC4REcWAAT/5bWtoxW0zVydUSnHVjgNYteNAwLQeZcWYftHhKanz7saw3hXY1xxabjFcCUZ7dZ1ken7h1ugLZZG7IlTBeXPlTry5cieG96lwLLPqdPXETc/2+RMGBfTwx5rSM3ZgT9Tsbop4czfLX79yrP9xuW3cQFlJYMAf3MMffEJwzPBqHDOit78q0aEDeuD7Zx6KYb277+Cbryk9quoVkesBvAWjtOajqrpSRK415z8oIoMBLATQC4BPRH4EYIJZzS1k3YwcCBFRnktKlR7KD11mKs++luTVIgcArxkcZWrQaq+KYvQJuglSLhhY1X2n1ievnhxwR9hkCh6ce+PUI1yv29TuPtXJTRD+22lHBVyRsafCfPzLs/ypN89dczL+duVxAev+5LzD8LPzxxv7su3q6imjHff1BVsFIPvNt6rCvM5XTh6J2jsuxrrbLvJPm/2Ls/Dq9VNw7ecOBQD0Ki/Gez89E2MHVqGi1ONvhz3Id/M65BJVnamqh6nqoap6mzntQVV90Hy8U1WHq2ovVe1tPj4Qbl0iIko+BvwUIlWBeaZ6+Ns7fTlZ+9w+2LhnWXFAL3QyBQ+wLQ8zyNlJLANa3dxwqkiCcvg9RY6Pi0TQszw0MLfWtb/fbuJr+8lU8HatvwenqkVW739xlGpCJbY0sXy78RYREWU/Bvzkp66La8THzYlEIqUgw2n3+sIOEM1mFSWBAX/wnYGTJXjQbXkM70EsA1rd9GyLCHw+e0qPc+67pwghVzxUbQF/jD3q9m2V2153RfcNyZyO1TrhscZ9BP8FWc89HvsJSA5+GImIKKcx4Ke0yVzA35Wyu6mmUpkt8OxRVoyyFPXwBystLnLdCx1LwO++h7/7ub1n3P64SAQ9Sp16+Lvn25eNxp7SEzxI1/rcOlV7KjGPP9qhFefvoF0iIsoBHLRLfpHK2z8zf0tC297Z2IYZy6JX6yst9gBIvASm3bpdoXcHzgX2G1BVlnpQGUOqTSKKRBzr6DspcahoFI6bQbueIglI6bH3zgf3klc5pPRY9wSwx+Zu9mu/mhKcpmS9FqWe0NffWlbMEpzBexIYvfz2IJ8pPURElG4M+MkvlQk9i7cYZRpLPOJY599SGuedVfPNlLH98b3PjcG8jZ8CAKorSnDflcfjH7M2YFS/Hrj9jdCSoonoXVmChhbj5mKxBKSdDvX2g/3pSxMBuE/pufr00bjnvfUAAgP34F7yIdXluGTiEJxwSB98tq0RV00ZhVJPEb50wnD84sLx/mW/e8YYbG9sxVsrdqK5owuVpR7c99XAAb+9K0tw1vgBaGjtxBFDevkrNZ1zxECcNrY/tu5vwVVTRoW01+r171VRjKunjMalxwfeKPaV607Da8t3BLQ93wbtEhFR9mNKD6WcSHc1lx+ePS5k/pSx/f2P472zquWy44cntH4m1d5xsf/x3V85Fn0qSwEAA6rKICIY1b8Hbr90Ir5nVoRx6/DBVbjhnNDX3e6iowb7H8cSkDrdedlywiF9UHvHxbh8knFvJbcpPb3KS/CNkw8BgIBUrOIi8Y8zKRJBsacI9331eFx12mjc9eVj0au8BOUlHvz58mMwsKrcv151RQnu+vKxqK4wKjW99oMpOPvwQQH7FRH866rJePn7p8H6CP7qosNRVuzxr9+rPLTSk9U+EcFNl0zAkUOrA+ZPHN4bv556RMBx5NmNt4iIKAcw4Ke0aGozAv5qh/KY9jzwRNMdSotzO5iyUj+Ki8Sfe55oh7B9MGs49jSWWALSSDckC96OmxMJ65i7q+3YthfQS+66iSH7z3QOfab3T0REhYcBP/lplKAwEc1mD79TL6l9oG608obRJOOuqJlkBclFReIPDJNR1SVqwG97DzwxvAdtkQL+oMDWzVatdfyDb23bEBF/rnw8veTFQdvOFAb8RESUbrkdHVFOEBE0tXtRWlzkWEveHvB74um6tcn1gN86fE+R+FNLkhPwR55fEmcPf1tn+Bz+eAJba9fWyacEnSb4U3ri2La1TleUcQdW+k2qzgsY8BMRUbrldnRESZXKjs+mdi96lhU7Bq/2+vKJpvTkesBv5e2rqi0/PLFtisTYw5+kgDQ4KHdTGjVSSo/TcrGwcvgzjXX4iYgo3XI7OqKkSmFGD9q9PpQXFzmm7JQmMaXHXuVn8qi+CW3L8vj/To5p+aHV5fjB2WMDKrZMGdsfl0wcgge/foLjOv++9hQAwHPXnIybLpmAqvISf097ovGhKgJuZuUkFQF/8Mmb01Yf+sYJuPyE7oHW3QF/9/OZPzwdd1x6tLmN+FN6HvjaCfjFheNx6ICeEZdzs+UXrz0F9155XPQFHcRyd2IiIqJkYFlOSjmBcTdXj6d7IGqfyhLsN8tAliVh0G51RQkaWzsDAtefnn8YvvLQvPgbbvrcYQNwzIjeWLa1wdXyZSUe/PT88Vi36yBeWrwNYwf2xFPfOSniOieaJyeH9OuBq6eMDpiXjpSegEG7Lt+D8pKiiCk9btp9/pGDcf6Rg/HvRXXmOsZ0q4dfBJgwtBcmDO0VuO04YubB1eX4/pljXS8f6QR4UgInk7HcrIyIiCgZ+MtDadHZ5UNJUVF3FRpbgFluu+lRvMGtlfNdEkfg6kYsFx6cbr4Uj+60lsSPoyuGHn63+yuOEnUHd2S72aw/f97hjrmB205hWkyKM24Y8BMRUbrxl4dsUpfT0+VTcyCqEU3Ze5TjCTaDWfGsPeBP5g2OMpF3bb0bydh1tApM9vfD7VWWaIsFv2bBA3Aj8Z/shPmGSsf7oSn6e+Cg3cxaVteAt1buzHQziIjSigE/pZ4AXp+i2FPkz70usXWZJ6PH0+rBLimOr9pMNDGdPCRpt5rEHv6YqvS4PNZoywWfY8RyGNF6+FMZ8MdyYkK557XlO/C9JxdluhlERGnFgJ/8Ujlo19vlQ7Gth9+e0tPXrEwDxJebDQBdZuPtg3aTm9LjfltDqysAdI9NGFxdHjC/Z5m7oTOlHiPVaWjvCtf7DidalZ6yktivsozs1yPi/OAecnvqVjT9ehqfiapy59eKveRERETuMeAnv1jjfatyCgBcf9ZYjB9UFXZZo4df/L289rSRL9mqtFi9qwOqygLW/7//OTJiW5xy+OPpBb7nimMdp8cSYN73VaN6yyH9euCeK47F34Kqubxxw+l45FuT8NoPpkTczsh+lbjnimNxb5g2xcIK+H907jjc8vkJIfPPOGyA/3FwpaQbpx6B8yYMAgCcNb57ubu+fAz+fPkx/udn2uYBoSeQYwf2xN1fOQbfPnVU1Pb+8sLDcedlE3HW+IGO89MR76fyBJiIiCidGPBT3KZOHIJhZu/z1KOH4LqznSugCATeLkVxkaDLLOoSkLdfJLjixBEB64wNKp141LDqiG3xp/TYc9HjKPE57dhhjtNjOXfobbtiMe3YYQHPAWBE30qcc8SgqMcUbv14WPeaGtyrHBcfPSRg3rDeFQF3QA4+UfruGWP8vfNfOK779elTWRpwsvb5iUMD1nOKl7943HD0clEPv7zEgy+fOCJs7f5kjs8IxjL5RESUbxjwk1+sPZr2wLDYIxEHe3b5FMVFRfCakWe4G2RZmwzuUY9Wu9xx0G4SI7dcTyEJqPgT5VDcDtoNfk1KgsZipLKHPJnjM6gwtXR4sbepPdPNICJKCwb85BctzztYkQQOLI0UKHb6fCj2iL+nOdyy4e4u63Zgb2lxanL4c/3uqNYJkUj0Y4l30G5pyBWV1EX8uf5+UOZdfO/HOOH372a6GUREacGAn/zi6eG3VikukogpNEYPv/h7+MMF8NYWQnqPXabnBPbwu1rFFbcBZrbGofYe/mhNdJsuE3zSFnzVJlploESk8nW2Nh2tlCnltk17mjPdBCKitGHAT36x1h23B8GeIgl7I6YHP9qA5XWN8BQVoY+Zj35YmAG+1iaDA+xwKUDBUpXS4/bkYeLw3knbZ7KMH1yFQ/pVAgAG9SoPmxdvcZsuE+09SkXAPHG4Me4hlSlWI/oar1UyqiNR9uDpGxEVMnf1AakgxN7Db6uXXhQ5pQcweumPGdEbT119EiaP7ouzDx+I6qABnPYt/Hrq4fjDzDUAjADvpe+fikv/Pjdg+e+dMQZXTh6JM//8obmP8PXkZ//iLLR7u3DuXbNiOErnbQV78dpTsG5XU8iA2FSZ96tzsL+lAxfdMxsAUFHiQWtnl+Oyf7xsIko8ghMO6YMp4/pjf3NHwPzg+L6oyNj+ybe/F7ENwa9J8BWeVNxR9pFvn4ia3U2uTwDjccWJIzC0dwXOGNc/qdud9fOz/Fe4iIiI0okBP/nFl9JjpYpED4qt+VPMQOq0saEBlb33+biRfWzTgeNtzy2j+/fAqP7d9eBLIwT8Vs9tPKJdLRhcXY5Jo/rGvf1YDa4uD6jvf+b4AXhjhfPdQytKjQo7p48zymZG68AXCAZXl0VeCKFXPYIHVsdSd9+t6ooSnHBI6OcgmUQEnztsQPQFYzSyX/yfPyIiokQwpYf8Yk7psUV8Agm4mZYTN72yEvaxc5Qa3GJ7L3NSU3qinMxkehBpLGUqo6X0uN1U8HaC3//y4uQH/ERERBQ7BvzkF88gS/tVgWgpPW7yru0xpD2gdBtP2wf3ui0v6Ua0TWW6bGcsZSqDFw1J6Ynz5CV4rfISfr0QERFlA/4ik188gyytNUSi3+jKXaUd5yDfbQxqHziczJszRQuoM12dJ5ZDjbZovMcSfKKQipQeIiIiih0DfvKLp4rF6WY+fkWpJ2rPcKT5R5vVV8YMMPLxVTUgMK1wGTzaTzpi7XUfN7Bn2HnHjewdcd18SumJNv+Y4YF3CLZ68vtXGRWYRvQ1qtvEUrGob4/E7yZMREREzjhol/xi6eGf/YuzAAC3X3o0bjhnHHqVl2CHtAEAeleW4JFvTcJlD3wSsE6kFJuvTh6Jk0b3xZZ9LXho1kYAgTfhqiovcVwveIux1OF/80en48K/GlVu5k4/G70qQvdx55cmAgC+fvIhOOXQfv4KP+/8+AwMqCrDsb99x9xXfAH/wt+cm5SThRLblY1Bvcrw2FWT/RV8ggW/LsHjI6K9bs9892Q0tHb6n7/z48+ho8uHIdUVeP+nn8OIvpWo3dOMcWFKrzr54Gdnoi1MlSEiIiJKDAN+8oslo8eqeFNW7MEh/YxeeStQ7N+zDCccElqxJlLPsYhg7MAqbNnX4p9mbe+QCNV1Qgbt2qLVaIH04YN7+R+Hq7ne2zwJsNpnsYLZXuXFONDmjfsmX/17Rq+G44bHdmVjQFWZ48mLJdqtt6L18PcoK0aPsu6vDnv1ozEDjKsksQT7gFF9J7hEKxERESUHU3rIL9Eb01iBoi/MmUOsKTbR7wkbyl4pJhlZNl0uRzLH09Zksp/o+HyR8/SjvS4ZHn9MREREScaAn/x88ZTpsbECyXBXCmIO+OMIPO0Dg5MRhHcm+Jqki32wsiKxMQWZHo9AREREycWAn/wSDW2LovTwpyOODKjSk4T9ebty486o9sHKqhrxtY564y3G+0RERHmFAT/5uc3hryx1rphjVVr5/MShjvNjqRUPuAs8QwftusvhP2xQ+Io8duMHB+aiGwOIu/PXLzthOACgLIM153tXlgSk9Fx89JCI1zaCX5eQuvwOa5813rjzbPDrEY/J5h2JJzmM8yAiIqLk46Bd8nNbpWfJzec5Tq+uKMGK/7sAlWFKaMaa0uMmtSS4xW5u1rXmdxe62vaSm85Dn6Bykat/e2HAdm+6eAJ+ev74jNWcX/M7oz33v18DAPjKpBG4/uyxqG9qD7tOtCN3epsuPX44zp0wCL3CVEuKxZRx/bH81vOTsi0iIiKKjgE/+blN6SkrDh/c9iwL/5GKVv0ldPmYFne9P7fBuVOlm+B1i4ok4jGnmtUea7Byj7JiiEjE8QvR3odwJ0PJDNAZ7BMREaUPU3rIL44b7cYk5pSeDFe+yaVqNdbVE2v8RKS2B88KeZ5Dx01ERETRMeAnv3CDbZPFE+OnLdOBZ6xXJDLJGrtglRGNfM+DyNvKpeMmIiKi6Bjwk1+qC1DGGkjmUg97pnnM6kRd5klb5Dr8fGGJiIgKCQN+8nM7aDdesQ7ajT68tNsXjxsW1xWBQwf0wNHDqmNfMctYPfw+fw+/+3V5AkCJEJELRWStiNSIyHSH+SIi95rzl4vI8bZ5PxaRlSKyQkSeFZHy9LaeiKgwcNAu+WVdDn8Mi9/9lWNx91eOja1BAN776Zkxr5ONrJMpf0qPebLEWJ5SSUQ8AO4HcB6AOgALRGSGqq6yLXYRgHHmv5MAPADgJBEZBuCHACaoaquIvADgCgCPpfEQiIgKAnv4yU9TnNQTa/DJWNW9kuCUHv5lU3pMBlCjqhtVtQPAcwCmBS0zDcATapgHoLeIDDHnFQOoEJFiAJUAtqer4UREhYRhAfmlvIc/xpQeppq456/S44uewx+MrzIlYBiArbbndea0qMuo6jYAfwawBcAOAI2q+nYK20pEVLAY8JOfL8UBv5ubXVF8iq0qPeZ7aJ0s8RWnFHP6iIXcD89pGRHpA6P3fzSAoQB6iMjXHXcico2ILBSRhfX19Qk1mIioEDHgL0BdPsXDszeirbPLP23tzoN4e+XOlO63iGV3UiZcDz+vklCK1QEYYXs+HKFpOeGWORfAJlWtV9VOAC8BONVpJ6r6kKpOUtVJAwYMSFrjiYgKBQP+AvSfxXX4/eurcf8HNf5pF/x1Fv69qM5x+d6VJRg/qCrh/cY6aDeZLjt+uOtlb/n8BIzqV5nC1iTf2IE9MbCqDBdPNFKjY3qpwyx75eSROG/CoMQbR/lsAYBxIjJaREphDLqdEbTMDADfNKv1nAwjdWcHjFSek0WkUowz03MArE5n44mICgWr9BSg1g6jZ7+xtdPV8ktvPh8AMGr66wntN5Md/DddcoTrZa86bTSuOm10CluTfIcP7oVPbzzX/7woCSk9t196dIKtonynql4RuR7AWwA8AB5V1ZUicq05/0EAMwFMBVADoAXAVea8+SLyIoDFALwAlgB4KP1HQUSU/5IS8IvIowAuAbBbVY8yp/UF8DyAUQBqAXxZVfcnY3+UGCvwTvWddUP2m8GIX5jNTpQSqjoTRlBvn/ag7bECuC7MurcAuCWlDSQioqSl9DwG4MKgadMBvKeq4wC8Zz6nLGDldXf50rvfTKb0FFqZyphuvJW6ZhAREVEWSEoYpKqzAOwLmjwNwOPm48cBfCEZ+6LEWQM8U31n3WBFGQy6C7VCUIEeNlGIcH8KvlSXJyMiygKpDMEGmQOzYP4/MIX7ohhYP3xdaf6hy2TQXahxL1OZiCL72/s10RciIspxGU90YH3l9LMC73R3bCWjROTPzj8srvUKrYe/1FOEqUcPxr+uOtFx/pcnDcdPzjNeS5bupELQ1O51nD5/0940t4SIKP1SGfDvsm6fbv6/22kh1ldOv6IMpfS02+r+x+v6s8fFtV4m04kyQUTw96+dgNPG9necf+eXjsFFRw1Oc6uIMmdDfVOmm0BElDGpDINmAPiW+fhbAF5N4b4oBpmq0tOWhIA/XpkcMJytmLlMRERUGJIS8IvIswA+ATBeROpE5GoAdwA4T0TWAzjPfE5ZwH9X1jRHfC0d0QP+VJ2DFFpKTyz4ylAh2NHYlukmEBFlTFLq8KvqlWFmnZOM7VNyiT+HP70Rf2sMPfzJzivP5D0AiCjz6va3ZroJREQZU2CZzQR0p/SkOd6Htyu2HVoherEn/MeUcXz80v3+ExERUWYkpYefckuR/8ZbsUV8//r2idjT1B7T8lc9tgAAcPkJw3HN58a4XldVcUi/Snz/zEPx5UkjHJe59Lhh+MJxw1xvk5wx24mIiCi/MeAvQFbPbqwpPWcdHtutFOzL3/bFo1FaHP2Ckj34FBH84sLDwy5767QjUVbsialNRERERIWGKT0FqMsM9NM5aLc4Bbk37JhOjLJODxERUUFgwF+ArPr76azDn4pBs7xhFBEREVF0DPgLkJXK08VRmwVNeI2EiIioIDDgL0A+n/F/rIN208HKye9VURJ1WYariWFKDxERUWHgoN0CZPXsZ2NKzKmH9sNvLj4CXz7RuTKPXbjmP/KtSejfsyzJLctf7OmnQrZuV1Omm0BElHIM+AuQlbtvpdX7sqinX0TwndPdle8MF6iec8SgZDYpbzGjiwgxlRomIspVTOkpQFZ8b4XLLTHcATebZOEFipzE15GIiCi/MeAvQL6glJ7mdm8mm0NEREREKcSAvwBZKTxWSs/BNgb8hYgpPURERIWBAX8BuunVlQCAd1fvxqjpr6N2T3OGWxQfpqIkprLUqIh0SL/KDLeEiIiIUomDdgkLNu/LdBPiwuoyiRnVvwf++c1JOHlMXwDAOz8+A+1eX4ZbRURERMnGgJ/Q0NwZMu3Iob2wcvuBDLTGPfbwJ+68Cd0VjcYNqspgS4iIiChVmNJDaGjtyHQT4sJ4n4iIiCg6BvyEhpbQHv5ckI03DiMiIiLKNgz4yTHgL2IwTURERJQXGPCTY0pPLsT7OdBEIiIioozjoF1KaWWWOdPPTllgngsnJURERESZxoCf0OH1QSQ1N2Ia1rsi+Rs1MYefiIiIKDqm9BA6u3woLgoMnhlKExEREeUHBvyEzi6FJyjgZ74MERERUX5gwE8AAA8DfCIiIqK8xICfAISW4WT4T0RERJQfGPAXoP49y0KmiQALbjw34Hk+WH7r+fjs1vMz3QwiIiKijGGVngJU4gmN5kUEA6q6TwTyJN5Hr/KSTDeBiIiIKKPYw1+AnMpv5kuPPhEREREFYsBfgBShEX9wDj8RERER5QcG/Hmsw+vDp5v2AQDW7DyAPU3t2LK3BbsOtIcsGxzu86ZWRERERPmBOfx57I431uDROZvw+g+n4OJ7P0bvyhI0tHQ6Lhsc4GdzuP/lScPxwsK6TDeDiIiIKCewhz+Prd99EACwp6kDAMIG+0BoDr/1/OZLJqSkbYn442UTseEPUzPdDCICICIXishaEakRkekO80VE7jXnLxeR423zeovIiyKyRkRWi8gp6W09EVFhYMCfx6y8fJ/PYZRuyLLO00PuwJsFRCQr20VUaETEA+B+ABcBmADgShEJ7iW4CMA48981AB6wzbsHwJuqejiAYwCsTnmjiYgKEAP+PFZsBsVdLgJ+QXBKDwNqIopqMoAaVd2oqh0AngMwLWiZaQCeUMM8AL1FZIiI9AJwBoBHAEBVO1S1IY1tJyIqGAz481iRFfA71eEMXjZk1G4KGkRE+WYYgK2253XmNDfLjAFQD+BfIrJERB4WkR6pbCwRUaFiwJ/HPDGk9Lg5KSAiCuLUNRD8ZRJumWIAxwN4QFWPA9AMIGQMAACIyDUislBEFtbX1yfSXiKigsSAP49Zee5eFwF/W6cv4Dk7+InIhToAI2zPhwPY7nKZOgB1qjrfnP4ijBOAEKr6kKpOUtVJAwYMSErDiYgKCQP+PGal9Phc9N5r0DIsw09ELiwAME5ERotIKYArAMwIWmYGgG+a1XpOBtCoqjtUdSeArSIy3lzuHACr0tZyIqICwjr8ecxjBu1uBu0yo4eIYqWqXhG5HsBbADwAHlXVlSJyrTn/QQAzAUwFUAOgBcBVtk38AMDT5snCxqB5RESUJAz481hRDFV6gq8CsEoPEbmhqjNhBPX2aQ/aHiuA68KsuxTApFS2z41HP96E/50yOtPNICJKGab05DH/oF0X3ffhzgmY2kNE+e63rzGTiIjyGwP+PBbLoN2QHn4G+kRERER5gQF/HrMC/k6vL8qSoTn8VsDP3H4iIiKi3MaAP48l1MPPHH4iIiKivMCAP48Vmd30nV2xB/wWpvYQERER5TYG/HnMn9LT5SKlJ+g5A30iIiKi/MCAP4/5U3rcBPzM1SciIiLKSwz485iV0tPhIqWHiIiIiPITA/485jHfXTc9/OEws4eIiIgotzHgz2PWjbfcVOkJJua6vDZARERElNtSHvCLyIUislZEakRkeqr3R92KYhi0G4w9+0RERET5IaUBv4h4ANwP4CIAEwBcKSITUrlP6uaRBAJ+RvxEREREeSHVPfyTAdSo6kZV7QDwHIBpKd4nBfEmMGiXcT8RERFRbkt1wD8MwFbb8zpzGqWBlbrfGU8Of5LbQkRERESZkeqA3yluDIg+ReQaEVkoIgvr6+tT3JzCouZL3emNv0oPEREREeW2VAf8dQBG2J4PB7DdvoCqPqSqk1R10oABA1LcnMJi3Uwrvhx+9vETERER5YNUB/wLAIwTkdEiUgrgCgAzUrxPMqkZ8XfEEPBbcT7DfSIiIqL8UJzKjauqV0SuB/AWAA+AR1V1ZSr3Sd2s3KmOGFJ6BKy9T0RERJRPUhrwA4CqzgQwM9X7oVBuUnqKpHtwL2Cm8qiyLCcRERFRnuCddvOYz0VKT7En8CMgDo+IiIiIKHcx4M9jblJ6yosDPwJFwV377OonIiIiymkM+POYldITKeCvKPUETrAG7VpxvjKjn4iIiCiXMeDPY1aVns4Id9qtKAkM+CXofyIiIiLKbQz485g/pSdCDn95cMAf3MNPRERERDmNAX8e89fhjyGlhzn8RFSIdja2ZboJREQpw4A/j/lc5PBv2tMc8NwK70f2rQQADKwqS0XTiIiyio/jlYgoj6W8Dj9ljn/QboSUnoaWzoDnYvbof/eMMTj10P44c/yAlLWPiIiIiFKPPfx5TM0s/i5f7D1XHhGcdfhA/wkAEVE+W7a1IdNNICJKGQb8eSyeK9Tdg3YZ6BNR4fh/Ty/OdBOIiFKGAX8e0zgifob5RERERPmFAX8ei2cImtWzz8CfiIiIKD8w4M9jsVSdKCs2PgpWJg/rVRARERHlB1bpyWNu4/35vz7HH/BbdfhZoo6IiIgoPzDgz2NuQ/ZBvcr9j61UHsb7RERERPmBKT15LK5Bu/6UHkb8RERERPmAAX8ei68sJ5P4iYgot7V1duHWGStxsK0z+sJEBYABfx5Ytf0A9jd3YOu+loDpcQX85v9x3KuLiCinxXNVlLLTc59uwWNza3Hve+sz3RSirMAc/hy3btdBTL13tv957R0X+x/HM/B22rFD8c/Zm9CznB8NIiLKTV3mz5+XvVdEANjDn/N2HWgLOy+er7npFx2B5beej55lDPiJKDoRuVBE1opIjYhMd5gvInKvOX+5iBwfNN8jIktE5LX0tZqIqLAw4M9j8Vyd9hQJepWXJL8xRJR3RMQD4H4AFwGYAOBKEZkQtNhFAMaZ/64B8EDQ/BsArE5xU4mIChoD/jzGfFQiSrHJAGpUdaOqdgB4DsC0oGWmAXhCDfMA9BaRIQAgIsMBXAzg4XQ2mgoHfwaJDAz48xi/54goxYYB2Gp7XmdOc7vMXwH8AoAvRe2jAiXRFyEqKAz4c5wEfa0daOvEnJo9WLJlP7Y3tGaoVURUIJziquC+BsdlROQSALtVdVHUnYhcIyILRWRhfX19PO3MOS0dXoya/jqenLc57DINLR3o7OK5EhFFx5GZOU6Cfkp/8MwSfLTO/Q/i1VNGJ7lFRFRA6gCMsD0fDmC7y2W+BOB/RGQqgHIAvUTkKVX9evBOVPUhAA8BwKRJk1J28VI19Ds1U+oPtgMA/jlrI75x8iEh86f88X3U7W/FF48bhru/cmyaW0dEuYY9/HmmZndTwPPR/XsEPH/w6wEFMvDrqUekvE1ElLcWABgnIqNFpBTAFQBmBC0zA8A3zWo9JwNoVNUdqvorVR2uqqPM9d53CvbJWd1+4wrujGXB51cE2O4azyR+IgDs4c97wb1VJZ7Ac7yiLOnNIqLco6peEbkewFsAPAAeVdWVInKtOf9BADMBTAVQA6AFwFWZai8VDv60EQViwJ9nioKu2QR/6RUFnQFItly/JqKcpKozYQT19mkP2h4rgOuibONDAB+moHlERASm9OQdT7SAnvE9EVFBm3rPbFz2wNxMN4OI0og9/Hlme0PgnXd9QbcVZ7xPRFTYVu04kOkmEFGasYc/z3QElWjbuKfZ/3hgVRmOGd47a6pQEBERpRKH7BIZGPAXkLd/fAb69CjFptsvznRTiIiIUobj0yjTDrR1orG1M9PN8GNKTwHhFyARUXjBvcFrdh5Ae6cPx4zonYnmUBKwKidlysRb3wYA1N6RHZ2sDPhzXCwhvIc1OImIXLvwr7MBZPYHW5mUEhf2b1G2UNWQDldvlw/FnvQm2TClJ9fF8KXGeJ+IKLt1eH1obvdmuhl5gydMlGmvLg28Od7SrQ0Ye+MbmLWuPq3tYMBfQIJr8BMRUXb5+sPzceQtb/mfS5ReHX6rO+PrQtmisbUTC2v3+U/kF2zaBwBpD/iZ0lNA7PH+XV8+BvM37stcY4iIKMSntfxeJsont8xYCQA4f8IgPPTNSRlrB3v4C4i9h//S44fjj1+amMHWEBFllw/W7IZm+ShPVUWX7f4qXp/iuU+3ZLBFRORGpu9/wYC/gATfhZeIiLp954mFeH/N7kw3I6JHPt6EQ389M2Da9Jc+y1BriChWf3hjdUb2y4C/gDDeJyKKbE9Te6abEGL9roP+xy8uqstgS3JPll+woQKyraEVQOY+kwz4Cwjr8BMRRTd3wx7U7W/JdDMAAFv2teC8u2fh3VW74lr/4dkb8dbKnUluVQ4wf+8Y71O2CA700x2ScdBujlmxrRGPza3Fzy8Yj1teXYkvnTDccTlPkQTkeRIRUXS//I+RHlOa5hrZ0azf3YRzJwzCmp0Hoy9s8/vXjfSBbLn5T7qwe4uy0fMLusfb/HP2JnT5gJs/PyEt+2bAn2O++8RC7Ghsw64DbZi9fg/KSpx/lDwi6DL7NmZcf1rayz8REeWyji5fppsQIhvTjYjIPatDwfLonE1pC/izqwuDorIq7Vj1XCtLnc/Z7JeKJg7vjevPHpfythERUaDWji58/+lF2NHYGtN6TvX3n1+wNVnNAgBsrG9K6vaSafeBNhx585tYub0xoe0wh5/IwIA/RzWZAX+PUo/jfN5ki4go895YsQMzP9uJO99cm9B2UvGVvvNAW/I3miQfrN2N5o4uPD63Nq71+RNIFIgBf46xvsQaWzsBAJVlzj38niJ+2xERJdMvX1yOl5ekp0qOOgw3/fPbiZ00EFH2+dt769OyHwb8OcbquW9oMQL+kjCBPeN9IqLk2N/cAQB4fuFW/Pj5ZXFtY+ZnOxK+qRfTU4jyz1/eWZeW/TDgzzFWD3+71xhQFu77v3dlaXoaRESU52qSkOve7vXhrZW7MG/jXmzZm/ySn/9dtj3qMnua2nHB3bP8z53GCWQbnuRQIdjZ2IZbZ6xEu7crZftgwJ9jgnPzfWG+DW+/9Oh0NIeIiCKwf2Xvb+nAFQ/Nwxl/+iD27USZ/4Nnl0Tdxn+XbcfaXbGV9UylPU3tmHbfx44DmhM9Gelen2cMlP1Ovv09PDa3Fo9+XJuyfTDgzzHBX4Hhej+qK0pS3hYiIorsnThvmBUsH2+r8u+FdVhWZ9xbJtmsEy1eIaBc0taZpT38InK5iKwUEZ+ITAqa9ysRqRGRtSJyQWLNJL+giD9cTigrFBARJUciX6czP+u+y+3+lo64t/PHN9ck0AoiKnSJ9vCvAHApgFn2iSIyAcAVAI4EcCGAv4uIc/1IikloSo+75YiIKD7J+jq1l+ZM9Z3QP1y7G7ujlN1M5c/EnqZ27GuO/wTHkuir9J/FdVi0eV/C7ch2B9s6sbA2+49zeV1DTrQzU55fsBUH2jpTsu2EAn5VXa2qTnXCpgF4TlXbVXUTgBoAkxPZFxmCq++Ey+FnvE9ElL02722OON8qzBCvb/9rAS59YG7AtHT+LEz6/bs4/nfvuFvY6WcsSY3t7FJc9sAnydlYFvt/Ty3Glx78xH+Pnmz1P/fNwZcezP/3I147D7ThVy99Fn3BOKQqh38YAPstAevMaZSg4IFM4TqJcqH6AhFRbkj/9+n5d8+KvlAUdfu7B8PeOmNlUoavLqzdh1HTX8eGJFQuitgxlWBjC+0X8LNtxh2JvV2JnShS5u1tak/JdqMG/CLyroiscPg3LdJqDtMc/3xF5BoRWSgiC+vr6922u2B1BfXoh+vhZx1+IqLkeHf1Lry3OjmDby3pHkv62NzakB/meH4mXl1qlP+cU7Mn7DK7kngHX/6UESWH821abVT13Di2WwdghO35cACORYJV9SEADwHApEmTOJ4+iuC8T1+YLn6m9BARJccDH25wnK6qeGvlLpw3YVBO3t38k417MWZATwyoKkvqdjsdepmb2r3o6lJUV3ZXkHNTQYdBAVFypCqlZwaAK0SkTERGAxgH4NMU7augeH2BX6TBPf7dcu/Hh4goF1gdLy8v2YZrn1qEx+MoK9nSHlp+b+u+5N+Qy66zK/D34q/vrsflD84Ns3RynXTbuzjmt287zwz6uWps6cSqHQcS2h87vYgCJVqW84siUgfgFACvi8hbAKCqKwG8AGAVgDcBXKeqqSsuWkC6gr6ww8X7OdjZRESUE+55bz0AYPdBI9c2nhSWh2ZvBGBUs7Hyrk+/M/YbctmpKuZv3ItbXl3hON/K87ar3dsCVcWT8zZjf4xVdTbvbcb3n17k6u6gzR2hy/hTUoN+xy7/x9yU1OYnKmSJVul5WVWHq2qZqg5S1Qts825T1UNVdbyqvpF4UwkI/dIMV/ZM2L1BRJQS95oBv5OXl9Rhyh/fD3uPFMt/l21HS4cXk37/Lm6esTIp7XpxUR2+8tA8PP7JZsf54X4WVu84iJteWYEfv7A0pv395pUVmPnZTszf2F1m0dvlc33zoD+9ZRT56whKAVq3KwkDggvsKne0zxvljlS9lVFz+Cl7PDN/CxpbA+uzzljmODSCPfxERCnU1O4N+WFeWLsPP35+GQDA61OUeCJ/EbeYHTjPzN+CZ+ZvSbhNK7fHlwZjBdyx9vA7ufrxhfhoXXcBjk827MUph/aLuI5Tzr+FcWxsCu1Eh9xLVQ4/pcArS7a5XpZ/9EREqXPULW+hodUIkB/+eBMAxFxf/Cv/SG898maHcQOxUlv+jVMwbg/2AeDKf87Dos37/c9fWlyXcBtyzYxl2/FZXWg6VTLxvIiiYcCfQ9pjqK/LjB4iotT6x0dGHr7TXXPd9ExvqI98861kezdMaVHr5yKWoNH+ExPt98ZeV/wnLyxDa1BqqtMA5nzyw2eX4PP3fZyenfG3n8JgwJ9DOmO48yIDfiIicsP6vYg1fUbj7FcOXu+lJdscT5oSEvQbePsbq5O7faIcw4A/hwQPbIqEg3aJiNIn3D1R0inRr/14A/i5G/aioaUDD5uVh9x4bM6mgOf3hBkI/Z8kpQBZV2PSLdL4hFR48pPatO6PcgcD/hyxdV9LTF8cDPeJiNJnzK9nZroJcQ9wtcZ8xdzDby7/wIcbcNVjC/DMp+4HHt/631UBz+dv3BvbzqOI9hvY4fXhiJvexKtL3Y+Ns3tr5U6s3G7k5S+o3Ycpf3wfze3ekOV+99qqkGmp9Oe316V1f6nwyYa9uOG5JQVbeShVR82APwfM3bAHp9/5ATbvdX9TliL28BMRFZR4e5OT8XNRs6spbKRyoC00EI7FqOmvY1tDa0LbsLywcCveXrkTe5vb0drZhdtnrolrO997chEuvtfIy7995mrU7W/Fmp2BVZK6fIonwpRITbosjo2n/2c5rnjI/QD1bzwyH68u3Q5vFlw1yycM+HPA2p0HY16H8T4RUWGJ93vfqqwTrkN11PTX8dv/rsLBtk7sb+50XgjhY87dB6PfmCxaaLewdl+UJdz5xYvLcc2Ti5KyLQA4+y8fhg1M3wszSDrfqWpA7/xzC7Zi3sbkvH+FIFXhGwP+PMV4n4jSQUQuFJG1IlIjItMd5ouI3GvOXy4ix5vTR4jIByKyWkRWisgN6W89Ad03wIrk0TmbcPStb+P1z3b4p9lPEA62e8PecfjON6NvP5qP1++Jaflo49iSlS2ysb4Z9eYdly974BPMXt9dlrSzqzB7qI+65S2c85ePMt0MCsKAP09x0C4RpZqIeADcD+AiABMAXCkiE4IWuwjAOPPfNQAeMKd7AfxUVY8AcDKA6xzWzVnxDoBNRKL3X4mlxX+YuQZb9gWmmbZ0uCuvOeHmt2LYk+Hfi1JTvz8ZP5X2k4efvLAs8Q3G0wbb4w/W7M5IGwDg8gfnormjCxv3hJacdXOlh1KHAX+eYrxPRGkwGUCNqm5U1Q4AzwGYFrTMNABPqGEegN4iMkRVd6jqYgBQ1YMAVgMYls7GU6BYBkm2dnYlLa/e2Hn0RZJZujN4S94uH3798meo2+88Vm7Gsu14w3Z1w41M/Q5v3tuMD9buRrvX+QSs3duFV5ZsS2hQ7KLN+7DP4c7MC2r3OyxtmHzbe662XZjXRVKvONMNIGc+n2Lexr3oUsWyrQ0xr894n4jSYBiArbbndQBOcrHMMAD+6ElERgE4DsD8lLSyQDw5L00DRFPgUxc5+ufd/RHe+fHn4CkSNLd7sX53E44d0Tuu/T300QYA3ScRn9buwzPzt2BjfROeu+aUkOV/+OwSAEDtHReHzHO6mqOq2NmYmR7txVsacOt/V+G8CYMwvE8Fbrp4AoqKuqOCu99Zjwc/2oAN9U04elg1zj9ycMz7uOyB7kG4hw+uwvPXnILqypKktL/QpeqEhwF/lnpy3mbcMmNl3OuzSg8RpYHTF03w71XEZUSkJ4D/APiRqh5wWBYicg2MdCCMHDkyvpZSVGt2HoS3y4diT3Ze/N9Y34zXlm/HkUN74TevrMC8jfuw/Nbz0as8NNCM9gv4uFk9Z/fB9ihLRmfvKLf2++1/LfAPhk43q+f9nVXGoOEvHjcME4f39s+3xlr87f0aAM4nMeG8t3oXhlRXBExbs/Mg3l61E5dPGpFIs/1iiV7eXLEDZxw2AJWlDGejyc6/asLG+ibH6WMH9nS1PuN9IkqDOgD2X/nhALa7XUZESmAE+0+r6kvhdqKqD6nqJFWdNGDAgKQ0PB1WbXc8f8lqH6zNTJDq1tZ9LTj3rln+qi8d5h3om9q9Aek4Mf8GuuxWdVv61CnYT8XN2Tq8Pizd2pBQek5Lh/uyqVc/vhBT750dMv3nLy6Pe//xWrGtEdc+tRg3vRJ/52ghYcCfYypKPK6WS3TwFhGRCwsAjBOR0SJSCuAKADOClpkB4JtmtZ6TATSq6g4xKgs8AmC1qt6V3man3ryN+xwDo2yXzDz5WLkJWu96x/nGUpc/+Amm/PGDhNsQ7bfTaVCuvdWRTjSWbA2f3x6v215fhS/cPwfNtgHTwSlG6bp/VTzpx07cNvegeX+HcOMuKBAD/hxjz8OLRPjOElGKqaoXwPUA3oIx6PYFVV0pIteKyLXmYjMBbARQA+CfAL5vTj8NwDcAnC0iS81/U9N7BKnzrUc/zXQT4pS5gP/5BVujLhPcOiuYXb0j+tWUP4cpP6rqvqbSf5cFX8ByH1CnIvBe4XAVaU5Ncu9a7Na0++ckZTvWSef/PrbA1fIc5OsOk55yjMdlxz3794koHVR1Joyg3j7tQdtjBXCdw3ofg19VWauxtRMn/O4d3Pz59FVK/XBtPQ4bXBVxmeCgeU9TOwZUlbna/n0f1DhOP+NPH2DrPqPikFMP/Xcejxx4JpJO48bSrQ34wv1z8Mp1p8U1SHlvc+RxCqlqfkuHN6Hc+tnr96Dd24WyYufMBv97xYjfFfYDZ6HFW/ajJkwOf3GRu7eMdfiJiChWVvA3p2YPvD7Fza+mNz/60r/PjWn5NTude/Zj+Qm0gv1w3l0dWNc+OOd9r6085a4D7VixrdH9zl2w6uo71dd3c5g3v7oSo6a/jl+//FlS2xVN/cF2bNmbunQb69g7unwpP+my3PHGGixNUupSOFv3peY1Y8CfhS79+9ywl+TcxPuTR/WFy8wfIiIiv9q9LWjt6ML3n16c9n1/FkegrIqw9ebjMXfDXjS1Rx7EevafI99F9pK/fZxwO1Q1JIgNDmlXbT+A+qboVYasgcbPzN+ScLtisaOxDWf8KfFxFdEs3dqAx+bWpnw/APDgRxvwhSSlLoWzI0XlXBnw5xg35TZfuPYUDtolIqKYdfl8+H9PL8rIvuO5kZcqMP43bya1HZv3ht4l1m7ngdTX1x/9q5n4+iPz0dbZ5Xi1YteBNky9dzY2u+hBD+78tiobWdzeITlWD8/eFHZeW2fi+7RnMsxwGFtBgRjw5xiP20G7jPeJiChG76/ZjVkZqh8fD1+YVI586PSaU7M3sDfZdqwHWjtdb8deeGnFtka8HnTH4BNvezfuNkby7updYedddE9yK1ilqxJRLmPAn2Pc3lCLAT8REcVq8ZaGTDchJuHivGz9DYw1P3vNzoMJn7zssaX9LNniXBr0zRU701qSddOeyFdR3LC/x+t3HUxaWdBYvLJkGxZtjn6X6GzAgD/HuM3Nz4feDSIiSr8MluKPnUNbm9u98HZl30G8sHArTr/zAyysjS9A1DCP492G3bVPLcKDH20AAOw+2IYnPqmNcw/uWYNfV2xrxKjpr2OBw+vitue+uaMraWVBY/Gj55fisgc+Sft+48GAP8ewh5+IiMjgVEH/yFvewk//HXqDrEx7damRZ16z27kKXzjW73mqB91uN8dQfP+pxbj51ZUhvfDxjLGIxEpXmr1+D4DIKUDB1u866G8vucOAP8e4Lbfp9sSAiIgoV6UidztVV8jbzcGyTk1etf0AVmxrhM+neHlJneP69vKf8R53pJKhe5s60NnlQ4M5PuCsP3+IVtuA3tPueD++nabAeXfPwg3PLc10MwL86qXP8OIi5/cuGzDgzzHuU3qIiIjyW7QSmtnIKVifeu9sXPK3j/HvRVvx4+cDr044/Z67vzdwoBcWhg9I31y5E9PumxNQDnTW+twZwJ1pz366BT/LwitLFgb8OYYpPURERIbfv7466dt8ct5mAMDOxjas3uF8Y69UsffiW1qTUMLSrVU7DmBDfXcqT7pDiR2NgWk66U7baWr34gfPLsE+h/fBLp0DnJOFAX+OcXmjXd5pl4iICk4yArFnPzVy5U++/b2kl48EgLr9LRg1/XV/eo09bcYpnejvH24ImZauMpRpTQ9WYPHmhoBJZ/8l8k3OEtXU7sXXHp6H9bsOAgCe+3QL/rtsO+7/oCbiet99YmFK25UKDPhzDAN5IiIiZ7fPTH6Pf7K9t3o3AOCr/5wHwOhVt7hJ2739jdUpORFxko6Qo6XDSMv6x6yNeHHR1tTv0LR250EcdctbmFOzF79zcaXInur0/prdqWxaSjDgz6D3Vu/y9yS4xcG4REREzh7+OPzdXbPFWrM3+UBb6PiDaOk776zahX98tDEl7XKS6phDVdFou4nYB2udxww0tXvxl7fXorPL5zg/HsvqGvyP3aRu7W9xf7OzbFSc6QYUsqsfNy4JXTl5pOt1ov3pDe5V7n/8rVMOwUVHD4mnaURERAXrN698lrZ92WPqv767PuKyaU8lEeCjdfX4OEWDd/81pzZqXLNiWyP+s7gO/5pTixF9K/HlSSOSs3NbWlT9wfbwy5m+9vD85Ow3Qxjw55hwl/vOPWIQ3l29C9d+box/2v9NOypNrSIiIsofT81Lbc17u2y+bn/Ds0scr0Qky4xl23HM8OqIy1zyt49xxYlGkJ/uG6o1tnRixrJt+PrJh6R9AHeyMeDPMdEurzHHn4iIiJIhlcE+YNxt17rjbiRW+ny0EKfD68OuA20Y0bcy+jYjlDa19vezF5fhnVW7cPTw3lG3l+2Yw59rwnzYGecTERHllhcWbsVuF+kkhc4KzqOFOje9sgKn3/kBGpOUb2+V5/RGGTswp2ZPUvaXSuzhz2JFAgRXGPOFKTnGeJ+IiCi3/OLF5ZluQk5w28Nv3SisucOL6sqSiMu6uaOyz9xxtP3mQn4/e/hTqMun+PrD8zFv496QeXX7WwKev7p0G6bdPwejpr/un1Ze4glZzxsu4GfET0RERHnIinzcBOn25SMvE32p7hON3A+yGPCnUP3Bdnxcswc3PLckZN4/ZwWW1brhuaVYFpTHVlYc+vaEG7Di9o+AiIiIKJc0tJh3vo0S6uxobANglPscNf11jJr+Oh5wuHFZOPcF3XDLqr3vNH7S3kFrt84su5ptGPCnUKSzRzdni849/M55ZHlw8klEREQU4l3zZmXxhDp/fHMN2r2h9zcIvlvx7PX1aDBz/634bWN9c8z7Pf/uWXG0MvUY8GcxpvQQERERxeZfc2oDno//zZsAgOc+3YL1YXrgv/HIp/7HbZ1dqN3TjIPtRpWifLjpKQftplCkNBv7h0eDTzNNsaT0EBEREeUzEcGizfuiLvdImDsuT3/JuKFa7R0XR1x/Q31zQPWkPIj32cOfSr4wgTwQ+OEJ02nvHPCHSemJsCsiIiKinCcAvmnriU/pvvIgyLdjwJ9CXWYkv+tAOyb9/t2AefbPUWeY+q5OKT2dYXr4qyuM8lM9y3jRhoiIiPKPCNDcEZqP78aanYF3yo3WT2qP02JN6QlXQt0t/yDlJGLAn0Jdtjd8T1N7wAfA/tnpcvhgVJZ6cNVpo0Omt3U6f9CvO2ssfjvtSHzxuGEJtJiIiIgoO9UncJOyC/862/WyB1oDb9xVFGO0PObXM2NbIcjT87cktL4TBvwp1BWUZ9NpS8exV+lxGoj7q6lHYMKQXiHTW8Kc2ZYWF+Gbp4xCUVGeXYMiIiIiAnD7G2vSsp81Ow8GdMymu/T5lr0t0ReKEQP+FAruue/w2gJ+23SnWzZ7RBzzx5rNEePBmMNPREREFN1db6+NukxbZ3dslu6+1EhjQOPFgD+FIgb8tmjeKaWnSODYW98apoffzR3jiIiIiArdve/XRO0o/eOb3VcT0n2n3VTsjgF/FD6fhs2bjyY4kG9s7YTPp2hq9wa8mZ1OAX+ROJ5RtsTZFiIiIiJyZ3ldo/9xuiv2pKIEe16WdFm/6yDOM+90ZtVaHTX9dVwycQju++rxAICfvLAULy3eBgAY3b8HPvjZmY7buu6ZxXh/zW4su+X8kKo5rR1dOOLmN/Gbi4/Ad04fEzDvpy8sw38W1wVMO/svHznu47Q73g+Z5hFxHBU+bmBPrNkZetOIUg/P3YiIiIjcuHXGStfLfrS2PoUtCTVj2Xbc9ZVjk7rNvIwSP611vinDa8t3+B9bwT4AbNrTHHZbb6zYiXavD/sdSiTtaTJGiwff0Q1ASLAfq8pST8gZ5X1fPQ5Pf+ekkGX/73+ORL+eZQntj4iIiKhQdIQpie5kxbbG6Atlubzs4U+FpjYvUB04rd1rpNc43SArUT3KikN6+C+ZONRx2dPG9kv6/omIiIgIeGnJtugLJVGsdf9dbTORlUXkTyKyRkSWi8jLItLbNu9XIlIjImtF5IKEWxonTdJI5yaH6jhWicySFKTT9CjzuH7D0z2YhIiIiIhSIxsH7b4D4ChVnQhgHYBfAYCITABwBYAjAVwI4O8iEnrb2BSxx/hdPk1K0O8U8FvTSlPQw19e4nFd9TUVgzuIiIiIKP1S0cOfUEqPqr5tezoPwJfMx9MAPKeq7QA2iUgNgMkAPklkf+HUH2zH1v0tGDewJ6rKSwLm1e5tQactT6v+YLtjGcxV2w+gf89S//qeIkGbt7sizs7GNhxs64TPB3h9PrR0dPlz//c2taNmdxNG9q1EU7s35A5t8Sgrdn9+1NzhXJufiIiIiHJLKur+JzOH/38BPG8+HgbjBMBSZ05LiSc+qcXf3q8BYFTlsZ8YnXtXYGWcE29713EbU+/tvuVyiUdw4qi+mLthr3/az19cjp+/uNxx3e2NbSH7SVTfHqXweCK/430qS7C/pRO9gk5yiIiIiCg3Od2HKVFRA34ReRfAYIdZN6rqq+YyNwLwAnjaWs1hece8ExG5BsA1ADBy5EgXTQ7Voyy5Y487uzQg2E+GKyePxFWnjULtnmZc8+SikPn3XHEsbnhuKQDgtR9MQd8epQCAd39yBs69a1bAsp/86mxsrG/GyWP6Yd2ugxg7sGdS20pEREREmZGRlB5VPTfSfBH5FoBLAJyj3cnydQBG2BYbDmB7mO0/BOAhAJg0aVJcyeg9bQF/h9cHh4ydhJw4qg8W1O5PaBtTjx6MwwZV4bBBVY7zzz1ikP/xUcO6ywGNHRi6/JDqCgyprgAAHDGkV0LtIiIiIqLskXWDdkXkQgC/BPA/qtpimzUDwBUiUiYiowGMA/BpIvuKxB7wN7d70el1X1vVjf5JqHEf7SqEJxUJW0REKSYiF5rV2GpEZLrDfBGRe835y0XkeLfrEhEVoqwbtAvgPgBlAN4xS0POU9VrVXWliLwAYBWMVJ/rVLUrwnYSYg/4m9q9Md1MwY2q8sRThqqiBPzFDPiJKMeY1dfuB3AejCu7C0Rkhqqusi12EYxOn3EATgLwAICTXK5LRFRwUhERJlqlZ2yEebcBuC2R7btl7z1/f81urNlxIGXbT9U22MNPRDloMoAaVd0IACLyHIwqbfagfRqAJ8yUz3ki0ltEhgAY5WJdIqKCs7e5I+nbzIs77Q6uLvc/vmXGyri2ccSQXljtcKIwZkAPDO5V7rBGdD3Liv21+ntXhlbSqSovRnunDz5V/82zRvatdNzW0Or42kBElELDAGy1Pa+D0YsfbZlhLtcFkJziDqnSv2cZRIySz5NH9cWkUX3wrVNHocPrQ3VlCWr3NGN5XSP69ijFqYf2Q+3eFhwzvBptnT60dXZh9c4D6F1RisMHV8HrU8zdsAcVJR6Ul3jQ2tmF6ooSDO1dgaVbG/CtRz/Fw9+chOYOLz5aV4/DB1ehorQYf/+gBj3KilGzuymkfaXFRZh2zFD//WK8XYpXlm5De5JTX4koeb56UvK/5yRZd6JNhkmTJunChQvjWnfL3ha0dHr9N6GqLPWgo8uHihIPepWXYG9zBzxFgi6fD2XFHjS2dqKq3AjIh1RXoEeZB9v2t6Kt0/gS9Kmi2CMY0suYt353E0QAMS+09CjzoMPrQ2VpMdq9XagsLcauA20o8RShqrwYLR1dGN6nAlv2taCqvNg/yBYwxhkcaOtEWbEH5SVFZnuLsftgG8qKPaiuCDw5aG73wlMkKC9J273LiCgLicgiVZ2U6XZYRORyABeo6nfM598AMFlVf2Bb5nUAt6vqx+bz9wD8AsCYaOs6SeR3osPrw7pdB7Fky35MGNoLPctKMH6wcyEFokxTW2eg0zwA/vmRls019mMJjlE7uxQiQImnyL+cqvpvuCoCtHt9KC/xON50VdVYprNL4TPnqwKdPh9Ujdixy6fo7PLBa+6ry6dobu9CaXERqitKsK2hFS0dXlSVl8Ajgi41lu/XoxTtXp9/u10+RUtHF/r2KMWepnb0Ki/BAfN+TkVFxn4bWjpRWeZBuRmX9utZikFV5ah26CR2I9JvRF708APAyH7OPeOWPmaZS8sIh2XGDAhf3tJNNZwBVaGDe52q8vQoK3ZM8RlY5dyLn+yyo0RESeKmIlu4ZUpdrJtUpcVFOGpYdUAlNKJsFSmAD56XL8E+EHgswcdVWhw6T0QCqtpYnaNOr4k1yb4dAKhAd4dqiQchHaz9bOFhPKXQB8WZKZJMCVXpISKigrYAwDgRGS0ipQCugFGlzW4GgG+a1XpOBtCoqjtcrktEREnArmMiIoqLqnpF5HoAbwHwAHjUrNJ2rTn/QQAzAUwFUAOgBcBVkdbNwGEQEeU9BvxERBQ3VZ0JI6i3T3vQ9lgBXOd2XSIiSj6m9BARERER5TEG/EREREREeYwBPxERERFRHmPAT0RERESUxxjwExERERHlMQb8RERERER5jAE/EREREVEeE6NEcnYQkXoAm+NcvT+APUlsTq4oxOMuxGMGCvO4C/GYgfDHfYiqDkh3Y7JJnv5OZGO7srFNQHa2KxvbBGRnu7KxTUB2tiueNoX9jciqgD8RIrJQVSdluh3pVojHXYjHDBTmcRfiMQOFe9yplq2vaza2KxvbBGRnu7KxTUB2tisb2wRkZ7uS3Sam9BARERER5TEG/EREREREeSyfAv6HMt2ADCnE4y7EYwYK87gL8ZiBwj3uVMvW1zUb25WNbQKys13Z2CYgO9uVjW0CsrNdSW1T3uTwExERERFRqHzq4SciIiIioiB5EfCLyIUislZEakRkeqbbkywiMkJEPhCR1SKyUkRuMKf3FZF3RGS9+X8f2zq/Ml+HtSJyQeZanxgR8YjIEhF5zXxeCMfcW0ReFJE15nt+Sr4ft4j82PxsrxCRZ0WkPB+PWUQeFZHdIrLCNi3m4xSRE0TkM3PevSIi6T6WbBXtd0AM95rzl4vI8W7XTWGbvma2ZbmIzBWRY2zzas33eqmILExWm1y260wRaTT3vVREbna7bgrb9HNbe1aISJeI9DXnpeS1cvq7DZqfic9UtDZl6jMVrV2Z+ExFa1PaP1Pmth1ju6Blkv/ZUtWc/gfAA2ADgDEASgEsAzAh0+1K0rENAXC8+bgKwDoAEwDcCWC6OX06gD+ajyeYx18GYLT5ungyfRxxHvtPADwD4DXzeSEc8+MAvmM+LgXQO5+PG8AwAJsAVJjPXwDw7Xw8ZgBnADgewArbtJiPE8CnAE4BIADeAHBRpo8tG/65+R0AMNV8zQTAyQDmu103hW06FUAf8/FFVpvM57UA+mfotTrT+u6Ndd1UtSlo+c8DeD8Nr1XI320mP1Mu25T2z5TLdqX1M+WmTZn4TJnbdoztUv3Zyoce/skAalR1o6p2AHgOwLQMtykpVHWHqi42Hx8EsBpGkDQNRnAI8/8vmI+nAXhOVdtVdROAGhivT04RkeEALgbwsG1yvh9zLxhfTo8AgKp2qGoD8vy4ARQDqBCRYgCVALYjD49ZVWcB2Bc0OabjFJEhAHqp6idqfPM/YVun0Ln5HZgG4Ak1zAPQ23xNU/UbEnW7qjpXVfebT+cBGJ6E/SbcrhStm8ztXgng2STsN6Iwf7d26f5MRW1Thj5Tbl6rcDL2WgVJy2cKiBjb2SX9s5UPAf8wAFttz+sQ+sLlPBEZBeA4APMBDFLVHYDxwQEw0FwsX16LvwL4BQCfbVq+H/MYAPUA/iVGKtPDItIDeXzcqroNwJ8BbAGwA0Cjqr6NPD7mILEe5zDzcfB0cvfZiPS6puJzFet2r4bRo2dRAG+LyCIRuSYJ7Ym1XaeIyDIReUNEjoxx3VS1CSJSCeBCAP+xTU7VaxVNuj9TsUrXZ8qtdH6mXMvkZyootrNL+merOO5WZg+nHNa8Kj0kIj1hfBB/pKoHJHzabs6/FiJyCYDdqrpIRM50s4rDtJw6ZlMxjEuPP1DV+SJyD4w0j3By/rjFyFmfBiNtpQHAv0Xk65FWcZiWU8fsUrjjLJTjj4eb1ybdr6vr7YrIWTCCsym2yaep6nYRGQjgHRFZY/ZYpqNdiwEcoqpNIjIVwCsAxrlcN1VtsnwewBxVtffcpuq1iiZr/1bT/JlyI92fqVhk5DMVHNsFz3ZYJaHPVj708NcBGGF7PhxGWkBeEJESGB+Ip1X1JXPyLvPSDsz/d5vT8+G1OA3A/4hILYxLVWeLyFPI72MGjOOoU1XrLP9FGCcA+Xzc5wLYpKr1qtoJ4CUY+af5fMx2sR5nHQIvz+f68SeTm89GpNc1FZ8rV9sVkYkw0henqepea7qqbjf/3w3gZSQvfS1qu1T1gKo2mY9nAigRkf5u1k1Vm2yuQFDqRQpfq2jS/ZlyJQOfqagy8JmKRdo/U2FiO7vkf7bcJPpn8z8YPaMbYfQSWgMYjsx0u5J0bAIjT/evQdP/hMDBfneaj49E4GC/jciRQY1hjv9MdA/azftjBjAbwHjz8a3mMeftcQM4CcBKGLn7AiOP/Qf5eswARiFw0G7MxwlgAYwBXNag3amZPq5s+OfmdwDGuCD7ILhP3a6bwjaNhDFG49Sg6T0AVNkezwVwYRpfq8Hovk/PZBhpd5LJ18pcrhpGTnaPdLxW5jYD/m4z+Zly2aa0f6Zctiutnyk3bcrgZ8oxtkv1Zytpb3Qm/8EYzbwOxsjlGzPdniQe1xQYl2qWA1hq/psKoB+A9wCsN//va1vnRvN1WIscr+CBwIA/748ZwLEAFprv9ysA+uT7cQP4PwBrAKwA8CSMIDfvjhlG79EOAJ0wemiujuc4AUwyX6sNAO6D+QPKf86/AwCuBXCt+VgA3G/O/wzApEjrpqlNDwPYb/t+X2hOHwPjh3wZjJPipP6uuWjX9eZ+l8EY+HlqpHXT0Sbz+bdhDGi3r5ey1yrM322mP1PR2pSpz1S0dmXiMxWxTZn4TJnbDxfbpfSzxTvtEhERERHlsXzI4SciIiIiojAY8BMRERER5TEG/EREREREeYwBPxERERFRHmPAT0RERESUQSLyqIjsFpEVLpf/soisEpGVIvJM1OVZpYeIiIiIKHNE5AwATQCeUNWjoiw7DsALAM5W1f0iMlCNm4SFxR5+IiIiIqIMUtVZMG4C5icih4rImyKySERmi8jh5qzvArhfVfeb60YM9gEG/ERERERE2eghAD9Q1RMA/AzA383phwE4TETmiMg8Ebkw2oaKU9hIIiIiIiKKkYj0BHAqgH+LiDW5zPy/GMA4AGcCGA5gtogcpaoN4bbHgJ+IiIiIKLsUAWhQ1WMd5tUBmKeqnQA2ichaGCcACyJtjIiIiIiIsoSqHoARzF8OAGI4xpz9CoCzzOn9YaT4bIy0PQb8REREREQZJCLPAvgEwHgRqRORqwF8DcDVIrIMwEoA08zF3wKwV0RWAfgAwM9VdW/E7bMsJxERERFR/mIPPxERERFRHmPAT0RERESUxxjwExERERHlMQb8RERERER5jAE/EREREVEeY8BPRERERJTHGPATEREREeUxBvxERERERHns/wOH36gL7dIiFAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 1440x360 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def plot_training(frame_idx, rewards, losses):\n",
    "    clear_output(True)\n",
    "    plt.figure(figsize=(20,5))\n",
    "    plt.subplot(131)\n",
    "    plt.title('frame %s. reward: %s' % (frame_idx, np.mean(rewards[-10:])))\n",
    "    plt.plot(rewards)\n",
    "    plt.subplot(132)\n",
    "    plt.title('loss')\n",
    "    plt.plot(losses)\n",
    "    plt.show()\n",
    "\n",
    "plot_training(i, all_rewards, losses)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
